1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 6 7declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>) 8 9define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m) { 10; RV32-LABEL: mscatter_nxv1i8: 11; RV32: # %bb.0: 12; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 13; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 14; RV32-NEXT: ret 15; 16; RV64-LABEL: mscatter_nxv1i8: 17; RV64: # %bb.0: 18; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 19; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 20; RV64-NEXT: ret 21 call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, i32 1, <vscale x 1 x i1> %m) 22 ret void 23} 24 25declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>) 26 27define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 28; RV32-LABEL: mscatter_nxv2i8: 29; RV32: # %bb.0: 30; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 31; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 32; RV32-NEXT: ret 33; 34; RV64-LABEL: mscatter_nxv2i8: 35; RV64: # %bb.0: 36; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 37; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 38; RV64-NEXT: ret 39 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 40 ret void 41} 42 43define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 44; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 45; RV32: # %bb.0: 46; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 47; RV32-NEXT: vnsrl.wi v25, v8, 0 48; RV32-NEXT: vsoxei32.v v25, (zero), v9, v0.t 49; RV32-NEXT: ret 50; 51; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 52; RV64: # %bb.0: 53; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 54; RV64-NEXT: vnsrl.wi v25, v8, 0 55; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t 56; RV64-NEXT: ret 57 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8> 58 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 59 ret void 60} 61 62define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 63; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 64; RV32: # %bb.0: 65; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 66; RV32-NEXT: vnsrl.wi v25, v8, 0 67; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 68; RV32-NEXT: vnsrl.wi v25, v25, 0 69; RV32-NEXT: vsoxei32.v v25, (zero), v9, v0.t 70; RV32-NEXT: ret 71; 72; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 73; RV64: # %bb.0: 74; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 75; RV64-NEXT: vnsrl.wi v25, v8, 0 76; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 77; RV64-NEXT: vnsrl.wi v25, v25, 0 78; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t 79; RV64-NEXT: ret 80 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8> 81 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 82 ret void 83} 84 85define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 86; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 87; RV32: # %bb.0: 88; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 89; RV32-NEXT: vnsrl.wi v25, v8, 0 90; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 91; RV32-NEXT: vnsrl.wi v25, v25, 0 92; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 93; RV32-NEXT: vnsrl.wi v25, v25, 0 94; RV32-NEXT: vsoxei32.v v25, (zero), v10, v0.t 95; RV32-NEXT: ret 96; 97; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 98; RV64: # %bb.0: 99; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 100; RV64-NEXT: vnsrl.wi v25, v8, 0 101; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 102; RV64-NEXT: vnsrl.wi v25, v25, 0 103; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 104; RV64-NEXT: vnsrl.wi v25, v25, 0 105; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t 106; RV64-NEXT: ret 107 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8> 108 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 109 ret void 110} 111 112declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>) 113 114define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m) { 115; RV32-LABEL: mscatter_nxv4i8: 116; RV32: # %bb.0: 117; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 118; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 119; RV32-NEXT: ret 120; 121; RV64-LABEL: mscatter_nxv4i8: 122; RV64: # %bb.0: 123; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 124; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 125; RV64-NEXT: ret 126 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %m) 127 ret void 128} 129 130define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) { 131; RV32-LABEL: mscatter_truemask_nxv4i8: 132; RV32: # %bb.0: 133; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 134; RV32-NEXT: vsoxei32.v v8, (zero), v10 135; RV32-NEXT: ret 136; 137; RV64-LABEL: mscatter_truemask_nxv4i8: 138; RV64: # %bb.0: 139; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 140; RV64-NEXT: vsoxei64.v v8, (zero), v12 141; RV64-NEXT: ret 142 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 143 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 144 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mtrue) 145 ret void 146} 147 148define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) { 149; RV32-LABEL: mscatter_falsemask_nxv4i8: 150; RV32: # %bb.0: 151; RV32-NEXT: ret 152; 153; RV64-LABEL: mscatter_falsemask_nxv4i8: 154; RV64: # %bb.0: 155; RV64-NEXT: ret 156 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer) 157 ret void 158} 159 160declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>) 161 162define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m) { 163; RV32-LABEL: mscatter_nxv8i8: 164; RV32: # %bb.0: 165; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, mu 166; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 167; RV32-NEXT: ret 168; 169; RV64-LABEL: mscatter_nxv8i8: 170; RV64: # %bb.0: 171; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, mu 172; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 173; RV64-NEXT: ret 174 call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m) 175 ret void 176} 177 178define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 179; RV32-LABEL: mscatter_baseidx_nxv8i8: 180; RV32: # %bb.0: 181; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 182; RV32-NEXT: vsext.vf4 v28, v9 183; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu 184; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 185; RV32-NEXT: ret 186; 187; RV64-LABEL: mscatter_baseidx_nxv8i8: 188; RV64: # %bb.0: 189; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 190; RV64-NEXT: vsext.vf8 v16, v9 191; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 192; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 193; RV64-NEXT: ret 194 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs 195 call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m) 196 ret void 197} 198 199declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>) 200 201define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m) { 202; RV32-LABEL: mscatter_nxv1i16: 203; RV32: # %bb.0: 204; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 205; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 206; RV32-NEXT: ret 207; 208; RV64-LABEL: mscatter_nxv1i16: 209; RV64: # %bb.0: 210; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 211; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 212; RV64-NEXT: ret 213 call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, i32 2, <vscale x 1 x i1> %m) 214 ret void 215} 216 217declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>) 218 219define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 220; RV32-LABEL: mscatter_nxv2i16: 221; RV32: # %bb.0: 222; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 223; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 224; RV32-NEXT: ret 225; 226; RV64-LABEL: mscatter_nxv2i16: 227; RV64: # %bb.0: 228; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 229; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 230; RV64-NEXT: ret 231 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 232 ret void 233} 234 235define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 236; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 237; RV32: # %bb.0: 238; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 239; RV32-NEXT: vnsrl.wi v25, v8, 0 240; RV32-NEXT: vsoxei32.v v25, (zero), v9, v0.t 241; RV32-NEXT: ret 242; 243; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 244; RV64: # %bb.0: 245; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 246; RV64-NEXT: vnsrl.wi v25, v8, 0 247; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t 248; RV64-NEXT: ret 249 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16> 250 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 251 ret void 252} 253 254define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 255; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 256; RV32: # %bb.0: 257; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 258; RV32-NEXT: vnsrl.wi v25, v8, 0 259; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 260; RV32-NEXT: vnsrl.wi v25, v25, 0 261; RV32-NEXT: vsoxei32.v v25, (zero), v10, v0.t 262; RV32-NEXT: ret 263; 264; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 265; RV64: # %bb.0: 266; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 267; RV64-NEXT: vnsrl.wi v25, v8, 0 268; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 269; RV64-NEXT: vnsrl.wi v25, v25, 0 270; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t 271; RV64-NEXT: ret 272 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16> 273 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 274 ret void 275} 276 277declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>) 278 279define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m) { 280; RV32-LABEL: mscatter_nxv4i16: 281; RV32: # %bb.0: 282; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 283; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 284; RV32-NEXT: ret 285; 286; RV64-LABEL: mscatter_nxv4i16: 287; RV64: # %bb.0: 288; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 289; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 290; RV64-NEXT: ret 291 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %m) 292 ret void 293} 294 295define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) { 296; RV32-LABEL: mscatter_truemask_nxv4i16: 297; RV32: # %bb.0: 298; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 299; RV32-NEXT: vsoxei32.v v8, (zero), v10 300; RV32-NEXT: ret 301; 302; RV64-LABEL: mscatter_truemask_nxv4i16: 303; RV64: # %bb.0: 304; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 305; RV64-NEXT: vsoxei64.v v8, (zero), v12 306; RV64-NEXT: ret 307 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 308 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 309 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue) 310 ret void 311} 312 313define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) { 314; RV32-LABEL: mscatter_falsemask_nxv4i16: 315; RV32: # %bb.0: 316; RV32-NEXT: ret 317; 318; RV64-LABEL: mscatter_falsemask_nxv4i16: 319; RV64: # %bb.0: 320; RV64-NEXT: ret 321 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 322 ret void 323} 324 325declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>) 326 327define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m) { 328; RV32-LABEL: mscatter_nxv8i16: 329; RV32: # %bb.0: 330; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 331; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 332; RV32-NEXT: ret 333; 334; RV64-LABEL: mscatter_nxv8i16: 335; RV64: # %bb.0: 336; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 337; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 338; RV64-NEXT: ret 339 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 340 ret void 341} 342 343define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 344; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 345; RV32: # %bb.0: 346; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 347; RV32-NEXT: vsext.vf4 v28, v10 348; RV32-NEXT: vadd.vv v28, v28, v28 349; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 350; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 351; RV32-NEXT: ret 352; 353; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 354; RV64: # %bb.0: 355; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 356; RV64-NEXT: vsext.vf8 v16, v10 357; RV64-NEXT: vadd.vv v16, v16, v16 358; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 359; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 360; RV64-NEXT: ret 361 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs 362 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 363 ret void 364} 365 366define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 367; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 368; RV32: # %bb.0: 369; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 370; RV32-NEXT: vsext.vf4 v28, v10 371; RV32-NEXT: vadd.vv v28, v28, v28 372; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 373; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 374; RV32-NEXT: ret 375; 376; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 377; RV64: # %bb.0: 378; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 379; RV64-NEXT: vsext.vf8 v16, v10 380; RV64-NEXT: vadd.vv v16, v16, v16 381; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 382; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 383; RV64-NEXT: ret 384 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 385 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 386 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 387 ret void 388} 389 390define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 391; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: 392; RV32: # %bb.0: 393; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 394; RV32-NEXT: vzext.vf4 v28, v10 395; RV32-NEXT: vadd.vv v28, v28, v28 396; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 397; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 398; RV32-NEXT: ret 399; 400; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: 401; RV64: # %bb.0: 402; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 403; RV64-NEXT: vzext.vf8 v16, v10 404; RV64-NEXT: vadd.vv v16, v16, v16 405; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 406; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 407; RV64-NEXT: ret 408 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 409 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 410 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 411 ret void 412} 413 414define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 415; RV32-LABEL: mscatter_baseidx_nxv8i16: 416; RV32: # %bb.0: 417; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 418; RV32-NEXT: vsext.vf2 v28, v10 419; RV32-NEXT: vadd.vv v28, v28, v28 420; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 421; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 422; RV32-NEXT: ret 423; 424; RV64-LABEL: mscatter_baseidx_nxv8i16: 425; RV64: # %bb.0: 426; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 427; RV64-NEXT: vsext.vf4 v16, v10 428; RV64-NEXT: vadd.vv v16, v16, v16 429; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 430; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 431; RV64-NEXT: ret 432 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs 433 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 434 ret void 435} 436 437declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>) 438 439define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m) { 440; RV32-LABEL: mscatter_nxv1i32: 441; RV32: # %bb.0: 442; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 443; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 444; RV32-NEXT: ret 445; 446; RV64-LABEL: mscatter_nxv1i32: 447; RV64: # %bb.0: 448; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 449; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 450; RV64-NEXT: ret 451 call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, i32 4, <vscale x 1 x i1> %m) 452 ret void 453} 454 455declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>) 456 457define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) { 458; RV32-LABEL: mscatter_nxv2i32: 459; RV32: # %bb.0: 460; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 461; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 462; RV32-NEXT: ret 463; 464; RV64-LABEL: mscatter_nxv2i32: 465; RV64: # %bb.0: 466; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 467; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 468; RV64-NEXT: ret 469 call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m) 470 ret void 471} 472 473define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) { 474; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 475; RV32: # %bb.0: 476; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 477; RV32-NEXT: vnsrl.wi v25, v8, 0 478; RV32-NEXT: vsoxei32.v v25, (zero), v10, v0.t 479; RV32-NEXT: ret 480; 481; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 482; RV64: # %bb.0: 483; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 484; RV64-NEXT: vnsrl.wi v25, v8, 0 485; RV64-NEXT: vsoxei64.v v25, (zero), v10, v0.t 486; RV64-NEXT: ret 487 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32> 488 call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m) 489 ret void 490} 491 492declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>) 493 494define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m) { 495; RV32-LABEL: mscatter_nxv4i32: 496; RV32: # %bb.0: 497; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 498; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 499; RV32-NEXT: ret 500; 501; RV64-LABEL: mscatter_nxv4i32: 502; RV64: # %bb.0: 503; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 504; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 505; RV64-NEXT: ret 506 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %m) 507 ret void 508} 509 510define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) { 511; RV32-LABEL: mscatter_truemask_nxv4i32: 512; RV32: # %bb.0: 513; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 514; RV32-NEXT: vsoxei32.v v8, (zero), v10 515; RV32-NEXT: ret 516; 517; RV64-LABEL: mscatter_truemask_nxv4i32: 518; RV64: # %bb.0: 519; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 520; RV64-NEXT: vsoxei64.v v8, (zero), v12 521; RV64-NEXT: ret 522 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 523 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 524 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue) 525 ret void 526} 527 528define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) { 529; RV32-LABEL: mscatter_falsemask_nxv4i32: 530; RV32: # %bb.0: 531; RV32-NEXT: ret 532; 533; RV64-LABEL: mscatter_falsemask_nxv4i32: 534; RV64: # %bb.0: 535; RV64-NEXT: ret 536 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 537 ret void 538} 539 540declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>) 541 542define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m) { 543; RV32-LABEL: mscatter_nxv8i32: 544; RV32: # %bb.0: 545; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 546; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 547; RV32-NEXT: ret 548; 549; RV64-LABEL: mscatter_nxv8i32: 550; RV64: # %bb.0: 551; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 552; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 553; RV64-NEXT: ret 554 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 555 ret void 556} 557 558define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 559; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 560; RV32: # %bb.0: 561; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 562; RV32-NEXT: vsext.vf4 v28, v12 563; RV32-NEXT: vsll.vi v28, v28, 2 564; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 565; RV32-NEXT: ret 566; 567; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 568; RV64: # %bb.0: 569; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 570; RV64-NEXT: vsext.vf8 v16, v12 571; RV64-NEXT: vsll.vi v16, v16, 2 572; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 573; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 574; RV64-NEXT: ret 575 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs 576 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 577 ret void 578} 579 580define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 581; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 582; RV32: # %bb.0: 583; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 584; RV32-NEXT: vsext.vf4 v28, v12 585; RV32-NEXT: vsll.vi v28, v28, 2 586; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 587; RV32-NEXT: ret 588; 589; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 590; RV64: # %bb.0: 591; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 592; RV64-NEXT: vsext.vf8 v16, v12 593; RV64-NEXT: vsll.vi v16, v16, 2 594; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 595; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 596; RV64-NEXT: ret 597 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 598 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 599 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 600 ret void 601} 602 603define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 604; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: 605; RV32: # %bb.0: 606; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 607; RV32-NEXT: vzext.vf4 v28, v12 608; RV32-NEXT: vsll.vi v28, v28, 2 609; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 610; RV32-NEXT: ret 611; 612; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: 613; RV64: # %bb.0: 614; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 615; RV64-NEXT: vzext.vf8 v16, v12 616; RV64-NEXT: vsll.vi v16, v16, 2 617; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 618; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 619; RV64-NEXT: ret 620 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 621 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 622 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 623 ret void 624} 625 626define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 627; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 628; RV32: # %bb.0: 629; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 630; RV32-NEXT: vsext.vf2 v28, v12 631; RV32-NEXT: vsll.vi v28, v28, 2 632; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 633; RV32-NEXT: ret 634; 635; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 636; RV64: # %bb.0: 637; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 638; RV64-NEXT: vsext.vf4 v16, v12 639; RV64-NEXT: vsll.vi v16, v16, 2 640; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 641; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 642; RV64-NEXT: ret 643 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs 644 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 645 ret void 646} 647 648define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 649; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 650; RV32: # %bb.0: 651; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 652; RV32-NEXT: vsext.vf2 v28, v12 653; RV32-NEXT: vsll.vi v28, v28, 2 654; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 655; RV32-NEXT: ret 656; 657; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 658; RV64: # %bb.0: 659; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 660; RV64-NEXT: vsext.vf4 v16, v12 661; RV64-NEXT: vsll.vi v16, v16, 2 662; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 663; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 664; RV64-NEXT: ret 665 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 666 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 667 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 668 ret void 669} 670 671define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 672; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: 673; RV32: # %bb.0: 674; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 675; RV32-NEXT: vzext.vf2 v28, v12 676; RV32-NEXT: vsll.vi v28, v28, 2 677; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 678; RV32-NEXT: ret 679; 680; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: 681; RV64: # %bb.0: 682; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 683; RV64-NEXT: vzext.vf4 v16, v12 684; RV64-NEXT: vsll.vi v16, v16, 2 685; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 686; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 687; RV64-NEXT: ret 688 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 689 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 690 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 691 ret void 692} 693 694define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 695; RV32-LABEL: mscatter_baseidx_nxv8i32: 696; RV32: # %bb.0: 697; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 698; RV32-NEXT: vsll.vi v28, v12, 2 699; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 700; RV32-NEXT: ret 701; 702; RV64-LABEL: mscatter_baseidx_nxv8i32: 703; RV64: # %bb.0: 704; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 705; RV64-NEXT: vsext.vf2 v16, v12 706; RV64-NEXT: vsll.vi v16, v16, 2 707; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 708; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 709; RV64-NEXT: ret 710 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs 711 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 712 ret void 713} 714 715declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>) 716 717define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m) { 718; RV32-LABEL: mscatter_nxv1i64: 719; RV32: # %bb.0: 720; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 721; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 722; RV32-NEXT: ret 723; 724; RV64-LABEL: mscatter_nxv1i64: 725; RV64: # %bb.0: 726; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 727; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 728; RV64-NEXT: ret 729 call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, i32 8, <vscale x 1 x i1> %m) 730 ret void 731} 732 733declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>) 734 735define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m) { 736; RV32-LABEL: mscatter_nxv2i64: 737; RV32: # %bb.0: 738; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 739; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 740; RV32-NEXT: ret 741; 742; RV64-LABEL: mscatter_nxv2i64: 743; RV64: # %bb.0: 744; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 745; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 746; RV64-NEXT: ret 747 call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %m) 748 ret void 749} 750 751declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>) 752 753define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m) { 754; RV32-LABEL: mscatter_nxv4i64: 755; RV32: # %bb.0: 756; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 757; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 758; RV32-NEXT: ret 759; 760; RV64-LABEL: mscatter_nxv4i64: 761; RV64: # %bb.0: 762; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 763; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 764; RV64-NEXT: ret 765 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %m) 766 ret void 767} 768 769define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) { 770; RV32-LABEL: mscatter_truemask_nxv4i64: 771; RV32: # %bb.0: 772; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 773; RV32-NEXT: vsoxei32.v v8, (zero), v12 774; RV32-NEXT: ret 775; 776; RV64-LABEL: mscatter_truemask_nxv4i64: 777; RV64: # %bb.0: 778; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 779; RV64-NEXT: vsoxei64.v v8, (zero), v12 780; RV64-NEXT: ret 781 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 782 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 783 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue) 784 ret void 785} 786 787define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) { 788; RV32-LABEL: mscatter_falsemask_nxv4i64: 789; RV32: # %bb.0: 790; RV32-NEXT: ret 791; 792; RV64-LABEL: mscatter_falsemask_nxv4i64: 793; RV64: # %bb.0: 794; RV64-NEXT: ret 795 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 796 ret void 797} 798 799declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>) 800 801define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m) { 802; RV32-LABEL: mscatter_nxv8i64: 803; RV32: # %bb.0: 804; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 805; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 806; RV32-NEXT: ret 807; 808; RV64-LABEL: mscatter_nxv8i64: 809; RV64: # %bb.0: 810; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 811; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 812; RV64-NEXT: ret 813 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 814 ret void 815} 816 817define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 818; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 819; RV32: # %bb.0: 820; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 821; RV32-NEXT: vsext.vf4 v28, v16 822; RV32-NEXT: vsll.vi v28, v28, 3 823; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 824; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 825; RV32-NEXT: ret 826; 827; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 828; RV64: # %bb.0: 829; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 830; RV64-NEXT: vsext.vf8 v24, v16 831; RV64-NEXT: vsll.vi v16, v24, 3 832; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 833; RV64-NEXT: ret 834 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs 835 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 836 ret void 837} 838 839define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 840; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 841; RV32: # %bb.0: 842; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 843; RV32-NEXT: vsext.vf8 v24, v16 844; RV32-NEXT: vsll.vi v16, v24, 3 845; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 846; RV32-NEXT: ret 847; 848; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 849; RV64: # %bb.0: 850; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 851; RV64-NEXT: vsext.vf8 v24, v16 852; RV64-NEXT: vsll.vi v16, v24, 3 853; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 854; RV64-NEXT: ret 855 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 856 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 857 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 858 ret void 859} 860 861define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 862; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: 863; RV32: # %bb.0: 864; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 865; RV32-NEXT: vzext.vf8 v24, v16 866; RV32-NEXT: vsll.vi v16, v24, 3 867; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 868; RV32-NEXT: ret 869; 870; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: 871; RV64: # %bb.0: 872; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 873; RV64-NEXT: vzext.vf8 v24, v16 874; RV64-NEXT: vsll.vi v16, v24, 3 875; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 876; RV64-NEXT: ret 877 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 878 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 879 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 880 ret void 881} 882 883define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 884; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 885; RV32: # %bb.0: 886; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 887; RV32-NEXT: vsext.vf2 v28, v16 888; RV32-NEXT: vsll.vi v28, v28, 3 889; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 890; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 891; RV32-NEXT: ret 892; 893; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 894; RV64: # %bb.0: 895; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 896; RV64-NEXT: vsext.vf4 v24, v16 897; RV64-NEXT: vsll.vi v16, v24, 3 898; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 899; RV64-NEXT: ret 900 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs 901 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 902 ret void 903} 904 905define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 906; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 907; RV32: # %bb.0: 908; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 909; RV32-NEXT: vsext.vf4 v24, v16 910; RV32-NEXT: vsll.vi v16, v24, 3 911; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 912; RV32-NEXT: ret 913; 914; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 915; RV64: # %bb.0: 916; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 917; RV64-NEXT: vsext.vf4 v24, v16 918; RV64-NEXT: vsll.vi v16, v24, 3 919; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 920; RV64-NEXT: ret 921 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 922 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 923 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 924 ret void 925} 926 927define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 928; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: 929; RV32: # %bb.0: 930; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 931; RV32-NEXT: vzext.vf4 v24, v16 932; RV32-NEXT: vsll.vi v16, v24, 3 933; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 934; RV32-NEXT: ret 935; 936; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: 937; RV64: # %bb.0: 938; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 939; RV64-NEXT: vzext.vf4 v24, v16 940; RV64-NEXT: vsll.vi v16, v24, 3 941; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 942; RV64-NEXT: ret 943 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 944 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 945 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 946 ret void 947} 948 949define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 950; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 951; RV32: # %bb.0: 952; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 953; RV32-NEXT: vsll.vi v28, v16, 3 954; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 955; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 956; RV32-NEXT: ret 957; 958; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 959; RV64: # %bb.0: 960; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 961; RV64-NEXT: vsext.vf2 v24, v16 962; RV64-NEXT: vsll.vi v16, v24, 3 963; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 964; RV64-NEXT: ret 965 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs 966 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 967 ret void 968} 969 970define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 971; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 972; RV32: # %bb.0: 973; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 974; RV32-NEXT: vsext.vf2 v24, v16 975; RV32-NEXT: vsll.vi v16, v24, 3 976; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 977; RV32-NEXT: ret 978; 979; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 980; RV64: # %bb.0: 981; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 982; RV64-NEXT: vsext.vf2 v24, v16 983; RV64-NEXT: vsll.vi v16, v24, 3 984; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 985; RV64-NEXT: ret 986 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 987 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 988 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 989 ret void 990} 991 992define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 993; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 994; RV32: # %bb.0: 995; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 996; RV32-NEXT: vzext.vf2 v24, v16 997; RV32-NEXT: vsll.vi v16, v24, 3 998; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 999; RV32-NEXT: ret 1000; 1001; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 1002; RV64: # %bb.0: 1003; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1004; RV64-NEXT: vzext.vf2 v24, v16 1005; RV64-NEXT: vsll.vi v16, v24, 3 1006; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1007; RV64-NEXT: ret 1008 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1009 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1010 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1011 ret void 1012} 1013 1014define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 1015; RV32-LABEL: mscatter_baseidx_nxv8i64: 1016; RV32: # %bb.0: 1017; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1018; RV32-NEXT: vsll.vi v16, v16, 3 1019; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1020; RV32-NEXT: ret 1021; 1022; RV64-LABEL: mscatter_baseidx_nxv8i64: 1023; RV64: # %bb.0: 1024; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1025; RV64-NEXT: vsll.vi v16, v16, 3 1026; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1027; RV64-NEXT: ret 1028 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs 1029 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1030 ret void 1031} 1032 1033declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>) 1034 1035define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m) { 1036; RV32-LABEL: mscatter_nxv1f16: 1037; RV32: # %bb.0: 1038; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1039; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1040; RV32-NEXT: ret 1041; 1042; RV64-LABEL: mscatter_nxv1f16: 1043; RV64: # %bb.0: 1044; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1045; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1046; RV64-NEXT: ret 1047 call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, i32 2, <vscale x 1 x i1> %m) 1048 ret void 1049} 1050 1051declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>) 1052 1053define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m) { 1054; RV32-LABEL: mscatter_nxv2f16: 1055; RV32: # %bb.0: 1056; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1057; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1058; RV32-NEXT: ret 1059; 1060; RV64-LABEL: mscatter_nxv2f16: 1061; RV64: # %bb.0: 1062; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1063; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1064; RV64-NEXT: ret 1065 call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %m) 1066 ret void 1067} 1068 1069declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>) 1070 1071define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m) { 1072; RV32-LABEL: mscatter_nxv4f16: 1073; RV32: # %bb.0: 1074; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1075; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1076; RV32-NEXT: ret 1077; 1078; RV64-LABEL: mscatter_nxv4f16: 1079; RV64: # %bb.0: 1080; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1081; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1082; RV64-NEXT: ret 1083 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %m) 1084 ret void 1085} 1086 1087define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) { 1088; RV32-LABEL: mscatter_truemask_nxv4f16: 1089; RV32: # %bb.0: 1090; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1091; RV32-NEXT: vsoxei32.v v8, (zero), v10 1092; RV32-NEXT: ret 1093; 1094; RV64-LABEL: mscatter_truemask_nxv4f16: 1095; RV64: # %bb.0: 1096; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1097; RV64-NEXT: vsoxei64.v v8, (zero), v12 1098; RV64-NEXT: ret 1099 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 1100 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 1101 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue) 1102 ret void 1103} 1104 1105define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) { 1106; RV32-LABEL: mscatter_falsemask_nxv4f16: 1107; RV32: # %bb.0: 1108; RV32-NEXT: ret 1109; 1110; RV64-LABEL: mscatter_falsemask_nxv4f16: 1111; RV64: # %bb.0: 1112; RV64-NEXT: ret 1113 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 1114 ret void 1115} 1116 1117declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>) 1118 1119define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m) { 1120; RV32-LABEL: mscatter_nxv8f16: 1121; RV32: # %bb.0: 1122; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1123; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1124; RV32-NEXT: ret 1125; 1126; RV64-LABEL: mscatter_nxv8f16: 1127; RV64: # %bb.0: 1128; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1129; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1130; RV64-NEXT: ret 1131 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1132 ret void 1133} 1134 1135define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1136; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1137; RV32: # %bb.0: 1138; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1139; RV32-NEXT: vsext.vf4 v28, v10 1140; RV32-NEXT: vadd.vv v28, v28, v28 1141; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1142; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1143; RV32-NEXT: ret 1144; 1145; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1146; RV64: # %bb.0: 1147; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1148; RV64-NEXT: vsext.vf8 v16, v10 1149; RV64-NEXT: vadd.vv v16, v16, v16 1150; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1151; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1152; RV64-NEXT: ret 1153 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs 1154 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1155 ret void 1156} 1157 1158define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1159; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1160; RV32: # %bb.0: 1161; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1162; RV32-NEXT: vsext.vf4 v28, v10 1163; RV32-NEXT: vadd.vv v28, v28, v28 1164; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1165; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1166; RV32-NEXT: ret 1167; 1168; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1169; RV64: # %bb.0: 1170; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1171; RV64-NEXT: vsext.vf8 v16, v10 1172; RV64-NEXT: vadd.vv v16, v16, v16 1173; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1174; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1175; RV64-NEXT: ret 1176 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1177 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1178 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1179 ret void 1180} 1181 1182define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1183; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: 1184; RV32: # %bb.0: 1185; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1186; RV32-NEXT: vzext.vf4 v28, v10 1187; RV32-NEXT: vadd.vv v28, v28, v28 1188; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1189; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1190; RV32-NEXT: ret 1191; 1192; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: 1193; RV64: # %bb.0: 1194; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1195; RV64-NEXT: vzext.vf8 v16, v10 1196; RV64-NEXT: vadd.vv v16, v16, v16 1197; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1198; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1199; RV64-NEXT: ret 1200 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1201 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1202 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1203 ret void 1204} 1205 1206define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1207; RV32-LABEL: mscatter_baseidx_nxv8f16: 1208; RV32: # %bb.0: 1209; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1210; RV32-NEXT: vsext.vf2 v28, v10 1211; RV32-NEXT: vadd.vv v28, v28, v28 1212; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1213; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1214; RV32-NEXT: ret 1215; 1216; RV64-LABEL: mscatter_baseidx_nxv8f16: 1217; RV64: # %bb.0: 1218; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1219; RV64-NEXT: vsext.vf4 v16, v10 1220; RV64-NEXT: vadd.vv v16, v16, v16 1221; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1222; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1223; RV64-NEXT: ret 1224 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs 1225 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1226 ret void 1227} 1228 1229declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>) 1230 1231define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m) { 1232; RV32-LABEL: mscatter_nxv1f32: 1233; RV32: # %bb.0: 1234; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1235; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1236; RV32-NEXT: ret 1237; 1238; RV64-LABEL: mscatter_nxv1f32: 1239; RV64: # %bb.0: 1240; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1241; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1242; RV64-NEXT: ret 1243 call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, i32 4, <vscale x 1 x i1> %m) 1244 ret void 1245} 1246 1247declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>) 1248 1249define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m) { 1250; RV32-LABEL: mscatter_nxv2f32: 1251; RV32: # %bb.0: 1252; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1253; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1254; RV32-NEXT: ret 1255; 1256; RV64-LABEL: mscatter_nxv2f32: 1257; RV64: # %bb.0: 1258; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1259; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1260; RV64-NEXT: ret 1261 call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %m) 1262 ret void 1263} 1264 1265declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>) 1266 1267define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m) { 1268; RV32-LABEL: mscatter_nxv4f32: 1269; RV32: # %bb.0: 1270; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1271; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1272; RV32-NEXT: ret 1273; 1274; RV64-LABEL: mscatter_nxv4f32: 1275; RV64: # %bb.0: 1276; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1277; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1278; RV64-NEXT: ret 1279 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %m) 1280 ret void 1281} 1282 1283define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) { 1284; RV32-LABEL: mscatter_truemask_nxv4f32: 1285; RV32: # %bb.0: 1286; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1287; RV32-NEXT: vsoxei32.v v8, (zero), v10 1288; RV32-NEXT: ret 1289; 1290; RV64-LABEL: mscatter_truemask_nxv4f32: 1291; RV64: # %bb.0: 1292; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1293; RV64-NEXT: vsoxei64.v v8, (zero), v12 1294; RV64-NEXT: ret 1295 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 1296 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 1297 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue) 1298 ret void 1299} 1300 1301define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) { 1302; RV32-LABEL: mscatter_falsemask_nxv4f32: 1303; RV32: # %bb.0: 1304; RV32-NEXT: ret 1305; 1306; RV64-LABEL: mscatter_falsemask_nxv4f32: 1307; RV64: # %bb.0: 1308; RV64-NEXT: ret 1309 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 1310 ret void 1311} 1312 1313declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>) 1314 1315define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m) { 1316; RV32-LABEL: mscatter_nxv8f32: 1317; RV32: # %bb.0: 1318; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1319; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1320; RV32-NEXT: ret 1321; 1322; RV64-LABEL: mscatter_nxv8f32: 1323; RV64: # %bb.0: 1324; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1325; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1326; RV64-NEXT: ret 1327 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1328 ret void 1329} 1330 1331define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1332; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1333; RV32: # %bb.0: 1334; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1335; RV32-NEXT: vsext.vf4 v28, v12 1336; RV32-NEXT: vsll.vi v28, v28, 2 1337; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1338; RV32-NEXT: ret 1339; 1340; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1341; RV64: # %bb.0: 1342; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1343; RV64-NEXT: vsext.vf8 v16, v12 1344; RV64-NEXT: vsll.vi v16, v16, 2 1345; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1346; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1347; RV64-NEXT: ret 1348 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs 1349 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1350 ret void 1351} 1352 1353define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1354; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1355; RV32: # %bb.0: 1356; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1357; RV32-NEXT: vsext.vf4 v28, v12 1358; RV32-NEXT: vsll.vi v28, v28, 2 1359; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1360; RV32-NEXT: ret 1361; 1362; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1363; RV64: # %bb.0: 1364; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1365; RV64-NEXT: vsext.vf8 v16, v12 1366; RV64-NEXT: vsll.vi v16, v16, 2 1367; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1368; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1369; RV64-NEXT: ret 1370 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1371 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1372 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1373 ret void 1374} 1375 1376define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1377; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: 1378; RV32: # %bb.0: 1379; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1380; RV32-NEXT: vzext.vf4 v28, v12 1381; RV32-NEXT: vsll.vi v28, v28, 2 1382; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1383; RV32-NEXT: ret 1384; 1385; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: 1386; RV64: # %bb.0: 1387; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1388; RV64-NEXT: vzext.vf8 v16, v12 1389; RV64-NEXT: vsll.vi v16, v16, 2 1390; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1391; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1392; RV64-NEXT: ret 1393 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1394 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1395 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1396 ret void 1397} 1398 1399define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1400; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1401; RV32: # %bb.0: 1402; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1403; RV32-NEXT: vsext.vf2 v28, v12 1404; RV32-NEXT: vsll.vi v28, v28, 2 1405; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1406; RV32-NEXT: ret 1407; 1408; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1409; RV64: # %bb.0: 1410; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1411; RV64-NEXT: vsext.vf4 v16, v12 1412; RV64-NEXT: vsll.vi v16, v16, 2 1413; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1414; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1415; RV64-NEXT: ret 1416 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs 1417 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1418 ret void 1419} 1420 1421define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1422; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1423; RV32: # %bb.0: 1424; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1425; RV32-NEXT: vsext.vf2 v28, v12 1426; RV32-NEXT: vsll.vi v28, v28, 2 1427; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1428; RV32-NEXT: ret 1429; 1430; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1431; RV64: # %bb.0: 1432; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1433; RV64-NEXT: vsext.vf4 v16, v12 1434; RV64-NEXT: vsll.vi v16, v16, 2 1435; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1436; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1437; RV64-NEXT: ret 1438 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1439 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1440 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1441 ret void 1442} 1443 1444define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1445; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: 1446; RV32: # %bb.0: 1447; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1448; RV32-NEXT: vzext.vf2 v28, v12 1449; RV32-NEXT: vsll.vi v28, v28, 2 1450; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1451; RV32-NEXT: ret 1452; 1453; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: 1454; RV64: # %bb.0: 1455; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1456; RV64-NEXT: vzext.vf4 v16, v12 1457; RV64-NEXT: vsll.vi v16, v16, 2 1458; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1459; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1460; RV64-NEXT: ret 1461 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1462 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1463 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1464 ret void 1465} 1466 1467define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1468; RV32-LABEL: mscatter_baseidx_nxv8f32: 1469; RV32: # %bb.0: 1470; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1471; RV32-NEXT: vsll.vi v28, v12, 2 1472; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1473; RV32-NEXT: ret 1474; 1475; RV64-LABEL: mscatter_baseidx_nxv8f32: 1476; RV64: # %bb.0: 1477; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1478; RV64-NEXT: vsext.vf2 v16, v12 1479; RV64-NEXT: vsll.vi v16, v16, 2 1480; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1481; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1482; RV64-NEXT: ret 1483 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs 1484 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1485 ret void 1486} 1487 1488declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>) 1489 1490define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m) { 1491; RV32-LABEL: mscatter_nxv1f64: 1492; RV32: # %bb.0: 1493; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1494; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1495; RV32-NEXT: ret 1496; 1497; RV64-LABEL: mscatter_nxv1f64: 1498; RV64: # %bb.0: 1499; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1500; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1501; RV64-NEXT: ret 1502 call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, i32 8, <vscale x 1 x i1> %m) 1503 ret void 1504} 1505 1506declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>) 1507 1508define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m) { 1509; RV32-LABEL: mscatter_nxv2f64: 1510; RV32: # %bb.0: 1511; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1512; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1513; RV32-NEXT: ret 1514; 1515; RV64-LABEL: mscatter_nxv2f64: 1516; RV64: # %bb.0: 1517; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1518; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1519; RV64-NEXT: ret 1520 call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %m) 1521 ret void 1522} 1523 1524declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>) 1525 1526define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m) { 1527; RV32-LABEL: mscatter_nxv4f64: 1528; RV32: # %bb.0: 1529; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1530; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1531; RV32-NEXT: ret 1532; 1533; RV64-LABEL: mscatter_nxv4f64: 1534; RV64: # %bb.0: 1535; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1536; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1537; RV64-NEXT: ret 1538 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %m) 1539 ret void 1540} 1541 1542define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) { 1543; RV32-LABEL: mscatter_truemask_nxv4f64: 1544; RV32: # %bb.0: 1545; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1546; RV32-NEXT: vsoxei32.v v8, (zero), v12 1547; RV32-NEXT: ret 1548; 1549; RV64-LABEL: mscatter_truemask_nxv4f64: 1550; RV64: # %bb.0: 1551; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1552; RV64-NEXT: vsoxei64.v v8, (zero), v12 1553; RV64-NEXT: ret 1554 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 1555 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 1556 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue) 1557 ret void 1558} 1559 1560define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) { 1561; RV32-LABEL: mscatter_falsemask_nxv4f64: 1562; RV32: # %bb.0: 1563; RV32-NEXT: ret 1564; 1565; RV64-LABEL: mscatter_falsemask_nxv4f64: 1566; RV64: # %bb.0: 1567; RV64-NEXT: ret 1568 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 1569 ret void 1570} 1571 1572declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>) 1573 1574define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m) { 1575; RV32-LABEL: mscatter_nxv8f64: 1576; RV32: # %bb.0: 1577; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1578; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 1579; RV32-NEXT: ret 1580; 1581; RV64-LABEL: mscatter_nxv8f64: 1582; RV64: # %bb.0: 1583; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1584; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1585; RV64-NEXT: ret 1586 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1587 ret void 1588} 1589 1590define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1591; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1592; RV32: # %bb.0: 1593; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1594; RV32-NEXT: vsext.vf4 v28, v16 1595; RV32-NEXT: vsll.vi v28, v28, 3 1596; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1597; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1598; RV32-NEXT: ret 1599; 1600; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1601; RV64: # %bb.0: 1602; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1603; RV64-NEXT: vsext.vf8 v24, v16 1604; RV64-NEXT: vsll.vi v16, v24, 3 1605; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1606; RV64-NEXT: ret 1607 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs 1608 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1609 ret void 1610} 1611 1612define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1613; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1614; RV32: # %bb.0: 1615; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1616; RV32-NEXT: vsext.vf8 v24, v16 1617; RV32-NEXT: vsll.vi v16, v24, 3 1618; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1619; RV32-NEXT: ret 1620; 1621; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1622; RV64: # %bb.0: 1623; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1624; RV64-NEXT: vsext.vf8 v24, v16 1625; RV64-NEXT: vsll.vi v16, v24, 3 1626; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1627; RV64-NEXT: ret 1628 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1629 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1630 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1631 ret void 1632} 1633 1634define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1635; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: 1636; RV32: # %bb.0: 1637; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1638; RV32-NEXT: vzext.vf8 v24, v16 1639; RV32-NEXT: vsll.vi v16, v24, 3 1640; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1641; RV32-NEXT: ret 1642; 1643; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: 1644; RV64: # %bb.0: 1645; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1646; RV64-NEXT: vzext.vf8 v24, v16 1647; RV64-NEXT: vsll.vi v16, v24, 3 1648; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1649; RV64-NEXT: ret 1650 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1651 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1652 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1653 ret void 1654} 1655 1656define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1657; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1658; RV32: # %bb.0: 1659; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1660; RV32-NEXT: vsext.vf2 v28, v16 1661; RV32-NEXT: vsll.vi v28, v28, 3 1662; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1663; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1664; RV32-NEXT: ret 1665; 1666; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1667; RV64: # %bb.0: 1668; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1669; RV64-NEXT: vsext.vf4 v24, v16 1670; RV64-NEXT: vsll.vi v16, v24, 3 1671; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1672; RV64-NEXT: ret 1673 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs 1674 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1675 ret void 1676} 1677 1678define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1679; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1680; RV32: # %bb.0: 1681; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1682; RV32-NEXT: vsext.vf4 v24, v16 1683; RV32-NEXT: vsll.vi v16, v24, 3 1684; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1685; RV32-NEXT: ret 1686; 1687; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1688; RV64: # %bb.0: 1689; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1690; RV64-NEXT: vsext.vf4 v24, v16 1691; RV64-NEXT: vsll.vi v16, v24, 3 1692; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1693; RV64-NEXT: ret 1694 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1695 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1696 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1697 ret void 1698} 1699 1700define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1701; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: 1702; RV32: # %bb.0: 1703; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1704; RV32-NEXT: vzext.vf4 v24, v16 1705; RV32-NEXT: vsll.vi v16, v24, 3 1706; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1707; RV32-NEXT: ret 1708; 1709; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: 1710; RV64: # %bb.0: 1711; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1712; RV64-NEXT: vzext.vf4 v24, v16 1713; RV64-NEXT: vsll.vi v16, v24, 3 1714; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1715; RV64-NEXT: ret 1716 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1717 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1718 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1719 ret void 1720} 1721 1722define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1723; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1724; RV32: # %bb.0: 1725; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1726; RV32-NEXT: vsll.vi v28, v16, 3 1727; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1728; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t 1729; RV32-NEXT: ret 1730; 1731; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1732; RV64: # %bb.0: 1733; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1734; RV64-NEXT: vsext.vf2 v24, v16 1735; RV64-NEXT: vsll.vi v16, v24, 3 1736; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1737; RV64-NEXT: ret 1738 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs 1739 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1740 ret void 1741} 1742 1743define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1744; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1745; RV32: # %bb.0: 1746; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1747; RV32-NEXT: vsext.vf2 v24, v16 1748; RV32-NEXT: vsll.vi v16, v24, 3 1749; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1750; RV32-NEXT: ret 1751; 1752; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1753; RV64: # %bb.0: 1754; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1755; RV64-NEXT: vsext.vf2 v24, v16 1756; RV64-NEXT: vsll.vi v16, v24, 3 1757; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1758; RV64-NEXT: ret 1759 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1760 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1761 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1762 ret void 1763} 1764 1765define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1766; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1767; RV32: # %bb.0: 1768; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1769; RV32-NEXT: vzext.vf2 v24, v16 1770; RV32-NEXT: vsll.vi v16, v24, 3 1771; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1772; RV32-NEXT: ret 1773; 1774; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1775; RV64: # %bb.0: 1776; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1777; RV64-NEXT: vzext.vf2 v24, v16 1778; RV64-NEXT: vsll.vi v16, v24, 3 1779; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1780; RV64-NEXT: ret 1781 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1782 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1783 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1784 ret void 1785} 1786 1787define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 1788; RV32-LABEL: mscatter_baseidx_nxv8f64: 1789; RV32: # %bb.0: 1790; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1791; RV32-NEXT: vsll.vi v16, v16, 3 1792; RV32-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1793; RV32-NEXT: ret 1794; 1795; RV64-LABEL: mscatter_baseidx_nxv8f64: 1796; RV64: # %bb.0: 1797; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1798; RV64-NEXT: vsll.vi v16, v16, 3 1799; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1800; RV64-NEXT: ret 1801 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs 1802 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1803 ret void 1804} 1805 1806declare void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, i32, <vscale x 16 x i1>) 1807 1808declare <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64) 1809declare <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64) 1810 1811define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x double*> %ptrs0, <vscale x 8 x double*> %ptrs1, <vscale x 16 x i1> %m) { 1812; RV32-LABEL: mscatter_nxv16f64: 1813; RV32: # %bb.0: 1814; RV32-NEXT: vl4re32.v v28, (a0) 1815; RV32-NEXT: vl4re32.v v24, (a1) 1816; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1817; RV32-NEXT: vsoxei32.v v8, (zero), v28, v0.t 1818; RV32-NEXT: csrr a0, vlenb 1819; RV32-NEXT: srli a0, a0, 3 1820; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 1821; RV32-NEXT: vslidedown.vx v0, v0, a0 1822; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1823; RV32-NEXT: vsoxei32.v v16, (zero), v24, v0.t 1824; RV32-NEXT: ret 1825; 1826; RV64-LABEL: mscatter_nxv16f64: 1827; RV64: # %bb.0: 1828; RV64-NEXT: addi sp, sp, -16 1829; RV64-NEXT: .cfi_def_cfa_offset 16 1830; RV64-NEXT: csrr a2, vlenb 1831; RV64-NEXT: slli a2, a2, 3 1832; RV64-NEXT: sub sp, sp, a2 1833; RV64-NEXT: vl8re64.v v24, (a0) 1834; RV64-NEXT: addi a0, sp, 16 1835; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1836; RV64-NEXT: vl8re64.v v16, (a1) 1837; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1838; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t 1839; RV64-NEXT: csrr a0, vlenb 1840; RV64-NEXT: srli a0, a0, 3 1841; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 1842; RV64-NEXT: vslidedown.vx v0, v0, a0 1843; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1844; RV64-NEXT: addi a0, sp, 16 1845; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload 1846; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1847; RV64-NEXT: csrr a0, vlenb 1848; RV64-NEXT: slli a0, a0, 3 1849; RV64-NEXT: add sp, sp, a0 1850; RV64-NEXT: addi sp, sp, 16 1851; RV64-NEXT: ret 1852 %p0 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0) 1853 %p1 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8) 1854 %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1855 %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1856 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %p1, i32 8, <vscale x 16 x i1> %m) 1857 ret void 1858} 1859 1860define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) { 1861; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1862; RV32: # %bb.0: 1863; RV32-NEXT: vl2r.v v2, (a1) 1864; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1865; RV32-NEXT: vsext.vf4 v24, v2 1866; RV32-NEXT: vsll.vi v24, v24, 3 1867; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1868; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1869; RV32-NEXT: csrr a1, vlenb 1870; RV32-NEXT: srli a1, a1, 3 1871; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1872; RV32-NEXT: vslidedown.vx v0, v0, a1 1873; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1874; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 1875; RV32-NEXT: ret 1876; 1877; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1878; RV64: # %bb.0: 1879; RV64-NEXT: vl2r.v v2, (a1) 1880; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1881; RV64-NEXT: vsext.vf8 v24, v2 1882; RV64-NEXT: vsll.vi v24, v24, 3 1883; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1884; RV64-NEXT: csrr a1, vlenb 1885; RV64-NEXT: srli a1, a1, 3 1886; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1887; RV64-NEXT: vslidedown.vx v0, v0, a1 1888; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1889; RV64-NEXT: vsext.vf8 v8, v3 1890; RV64-NEXT: vsll.vi v8, v8, 3 1891; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1892; RV64-NEXT: ret 1893 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i8> %idxs 1894 %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1895 %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1896 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m) 1897 ret void 1898} 1899 1900define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) { 1901; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 1902; RV32: # %bb.0: 1903; RV32-NEXT: vl4re16.v v4, (a1) 1904; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1905; RV32-NEXT: vsext.vf2 v24, v4 1906; RV32-NEXT: vsll.vi v24, v24, 3 1907; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1908; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1909; RV32-NEXT: csrr a1, vlenb 1910; RV32-NEXT: srli a1, a1, 3 1911; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1912; RV32-NEXT: vslidedown.vx v0, v0, a1 1913; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1914; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 1915; RV32-NEXT: ret 1916; 1917; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 1918; RV64: # %bb.0: 1919; RV64-NEXT: vl4re16.v v4, (a1) 1920; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1921; RV64-NEXT: vsext.vf4 v24, v4 1922; RV64-NEXT: vsll.vi v24, v24, 3 1923; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1924; RV64-NEXT: csrr a1, vlenb 1925; RV64-NEXT: srli a1, a1, 3 1926; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1927; RV64-NEXT: vslidedown.vx v0, v0, a1 1928; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1929; RV64-NEXT: vsext.vf4 v8, v6 1930; RV64-NEXT: vsll.vi v8, v8, 3 1931; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1932; RV64-NEXT: ret 1933 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs 1934 %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1935 %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1936 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m) 1937 ret void 1938} 1939