1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
6
7declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>)
8
9define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m) {
10; RV32-LABEL: mscatter_nxv1i8:
11; RV32:       # %bb.0:
12; RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
13; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
14; RV32-NEXT:    ret
15;
16; RV64-LABEL: mscatter_nxv1i8:
17; RV64:       # %bb.0:
18; RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
19; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
20; RV64-NEXT:    ret
21  call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, i32 1, <vscale x 1 x i1> %m)
22  ret void
23}
24
25declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
26
27define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
28; RV32-LABEL: mscatter_nxv2i8:
29; RV32:       # %bb.0:
30; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
31; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
32; RV32-NEXT:    ret
33;
34; RV64-LABEL: mscatter_nxv2i8:
35; RV64:       # %bb.0:
36; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
37; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
38; RV64-NEXT:    ret
39  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
40  ret void
41}
42
43define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
44; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
45; RV32:       # %bb.0:
46; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
47; RV32-NEXT:    vnsrl.wi v25, v8, 0
48; RV32-NEXT:    vsoxei32.v v25, (zero), v9, v0.t
49; RV32-NEXT:    ret
50;
51; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
52; RV64:       # %bb.0:
53; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
54; RV64-NEXT:    vnsrl.wi v25, v8, 0
55; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
56; RV64-NEXT:    ret
57  %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
58  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
59  ret void
60}
61
62define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
63; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
64; RV32:       # %bb.0:
65; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
66; RV32-NEXT:    vnsrl.wi v25, v8, 0
67; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
68; RV32-NEXT:    vnsrl.wi v25, v25, 0
69; RV32-NEXT:    vsoxei32.v v25, (zero), v9, v0.t
70; RV32-NEXT:    ret
71;
72; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
73; RV64:       # %bb.0:
74; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
75; RV64-NEXT:    vnsrl.wi v25, v8, 0
76; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
77; RV64-NEXT:    vnsrl.wi v25, v25, 0
78; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
79; RV64-NEXT:    ret
80  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
81  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
82  ret void
83}
84
85define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
86; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
87; RV32:       # %bb.0:
88; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
89; RV32-NEXT:    vnsrl.wi v25, v8, 0
90; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
91; RV32-NEXT:    vnsrl.wi v25, v25, 0
92; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
93; RV32-NEXT:    vnsrl.wi v25, v25, 0
94; RV32-NEXT:    vsoxei32.v v25, (zero), v10, v0.t
95; RV32-NEXT:    ret
96;
97; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
98; RV64:       # %bb.0:
99; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
100; RV64-NEXT:    vnsrl.wi v25, v8, 0
101; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
102; RV64-NEXT:    vnsrl.wi v25, v25, 0
103; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
104; RV64-NEXT:    vnsrl.wi v25, v25, 0
105; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
106; RV64-NEXT:    ret
107  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
108  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
109  ret void
110}
111
112declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
113
114define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m) {
115; RV32-LABEL: mscatter_nxv4i8:
116; RV32:       # %bb.0:
117; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
118; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
119; RV32-NEXT:    ret
120;
121; RV64-LABEL: mscatter_nxv4i8:
122; RV64:       # %bb.0:
123; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
124; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
125; RV64-NEXT:    ret
126  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %m)
127  ret void
128}
129
130define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) {
131; RV32-LABEL: mscatter_truemask_nxv4i8:
132; RV32:       # %bb.0:
133; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
134; RV32-NEXT:    vsoxei32.v v8, (zero), v10
135; RV32-NEXT:    ret
136;
137; RV64-LABEL: mscatter_truemask_nxv4i8:
138; RV64:       # %bb.0:
139; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
140; RV64-NEXT:    vsoxei64.v v8, (zero), v12
141; RV64-NEXT:    ret
142  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
143  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
144  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mtrue)
145  ret void
146}
147
148define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) {
149; RV32-LABEL: mscatter_falsemask_nxv4i8:
150; RV32:       # %bb.0:
151; RV32-NEXT:    ret
152;
153; RV64-LABEL: mscatter_falsemask_nxv4i8:
154; RV64:       # %bb.0:
155; RV64-NEXT:    ret
156  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer)
157  ret void
158}
159
160declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>)
161
162define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m) {
163; RV32-LABEL: mscatter_nxv8i8:
164; RV32:       # %bb.0:
165; RV32-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
166; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
167; RV32-NEXT:    ret
168;
169; RV64-LABEL: mscatter_nxv8i8:
170; RV64:       # %bb.0:
171; RV64-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
172; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
173; RV64-NEXT:    ret
174  call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m)
175  ret void
176}
177
178define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
179; RV32-LABEL: mscatter_baseidx_nxv8i8:
180; RV32:       # %bb.0:
181; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
182; RV32-NEXT:    vsext.vf4 v28, v9
183; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
184; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
185; RV32-NEXT:    ret
186;
187; RV64-LABEL: mscatter_baseidx_nxv8i8:
188; RV64:       # %bb.0:
189; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
190; RV64-NEXT:    vsext.vf8 v16, v9
191; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
192; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
193; RV64-NEXT:    ret
194  %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs
195  call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m)
196  ret void
197}
198
199declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>)
200
201define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m) {
202; RV32-LABEL: mscatter_nxv1i16:
203; RV32:       # %bb.0:
204; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
205; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
206; RV32-NEXT:    ret
207;
208; RV64-LABEL: mscatter_nxv1i16:
209; RV64:       # %bb.0:
210; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
211; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
212; RV64-NEXT:    ret
213  call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, i32 2, <vscale x 1 x i1> %m)
214  ret void
215}
216
217declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
218
219define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
220; RV32-LABEL: mscatter_nxv2i16:
221; RV32:       # %bb.0:
222; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
223; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
224; RV32-NEXT:    ret
225;
226; RV64-LABEL: mscatter_nxv2i16:
227; RV64:       # %bb.0:
228; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
229; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
230; RV64-NEXT:    ret
231  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
232  ret void
233}
234
235define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
236; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
237; RV32:       # %bb.0:
238; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
239; RV32-NEXT:    vnsrl.wi v25, v8, 0
240; RV32-NEXT:    vsoxei32.v v25, (zero), v9, v0.t
241; RV32-NEXT:    ret
242;
243; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
244; RV64:       # %bb.0:
245; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
246; RV64-NEXT:    vnsrl.wi v25, v8, 0
247; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
248; RV64-NEXT:    ret
249  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
250  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
251  ret void
252}
253
254define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
255; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
256; RV32:       # %bb.0:
257; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
258; RV32-NEXT:    vnsrl.wi v25, v8, 0
259; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
260; RV32-NEXT:    vnsrl.wi v25, v25, 0
261; RV32-NEXT:    vsoxei32.v v25, (zero), v10, v0.t
262; RV32-NEXT:    ret
263;
264; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
265; RV64:       # %bb.0:
266; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
267; RV64-NEXT:    vnsrl.wi v25, v8, 0
268; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
269; RV64-NEXT:    vnsrl.wi v25, v25, 0
270; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
271; RV64-NEXT:    ret
272  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
273  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
274  ret void
275}
276
277declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
278
279define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m) {
280; RV32-LABEL: mscatter_nxv4i16:
281; RV32:       # %bb.0:
282; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
283; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
284; RV32-NEXT:    ret
285;
286; RV64-LABEL: mscatter_nxv4i16:
287; RV64:       # %bb.0:
288; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
289; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
290; RV64-NEXT:    ret
291  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %m)
292  ret void
293}
294
295define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) {
296; RV32-LABEL: mscatter_truemask_nxv4i16:
297; RV32:       # %bb.0:
298; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
299; RV32-NEXT:    vsoxei32.v v8, (zero), v10
300; RV32-NEXT:    ret
301;
302; RV64-LABEL: mscatter_truemask_nxv4i16:
303; RV64:       # %bb.0:
304; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
305; RV64-NEXT:    vsoxei64.v v8, (zero), v12
306; RV64-NEXT:    ret
307  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
308  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
309  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
310  ret void
311}
312
313define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) {
314; RV32-LABEL: mscatter_falsemask_nxv4i16:
315; RV32:       # %bb.0:
316; RV32-NEXT:    ret
317;
318; RV64-LABEL: mscatter_falsemask_nxv4i16:
319; RV64:       # %bb.0:
320; RV64-NEXT:    ret
321  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
322  ret void
323}
324
325declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>)
326
327define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m) {
328; RV32-LABEL: mscatter_nxv8i16:
329; RV32:       # %bb.0:
330; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
331; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
332; RV32-NEXT:    ret
333;
334; RV64-LABEL: mscatter_nxv8i16:
335; RV64:       # %bb.0:
336; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
337; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
338; RV64-NEXT:    ret
339  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
340  ret void
341}
342
343define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
344; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
345; RV32:       # %bb.0:
346; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
347; RV32-NEXT:    vsext.vf4 v28, v10
348; RV32-NEXT:    vadd.vv v28, v28, v28
349; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
350; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
351; RV32-NEXT:    ret
352;
353; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
354; RV64:       # %bb.0:
355; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
356; RV64-NEXT:    vsext.vf8 v16, v10
357; RV64-NEXT:    vadd.vv v16, v16, v16
358; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
359; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
360; RV64-NEXT:    ret
361  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs
362  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
363  ret void
364}
365
366define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
367; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
368; RV32:       # %bb.0:
369; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
370; RV32-NEXT:    vsext.vf4 v28, v10
371; RV32-NEXT:    vadd.vv v28, v28, v28
372; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
373; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
374; RV32-NEXT:    ret
375;
376; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
377; RV64:       # %bb.0:
378; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
379; RV64-NEXT:    vsext.vf8 v16, v10
380; RV64-NEXT:    vadd.vv v16, v16, v16
381; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
382; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
383; RV64-NEXT:    ret
384  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
385  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
386  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
387  ret void
388}
389
390define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
391; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
392; RV32:       # %bb.0:
393; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
394; RV32-NEXT:    vzext.vf4 v28, v10
395; RV32-NEXT:    vadd.vv v28, v28, v28
396; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
397; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
398; RV32-NEXT:    ret
399;
400; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
401; RV64:       # %bb.0:
402; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
403; RV64-NEXT:    vzext.vf8 v16, v10
404; RV64-NEXT:    vadd.vv v16, v16, v16
405; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
406; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
407; RV64-NEXT:    ret
408  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
409  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
410  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
411  ret void
412}
413
414define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
415; RV32-LABEL: mscatter_baseidx_nxv8i16:
416; RV32:       # %bb.0:
417; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
418; RV32-NEXT:    vsext.vf2 v28, v10
419; RV32-NEXT:    vadd.vv v28, v28, v28
420; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
421; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
422; RV32-NEXT:    ret
423;
424; RV64-LABEL: mscatter_baseidx_nxv8i16:
425; RV64:       # %bb.0:
426; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
427; RV64-NEXT:    vsext.vf4 v16, v10
428; RV64-NEXT:    vadd.vv v16, v16, v16
429; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
430; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
431; RV64-NEXT:    ret
432  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs
433  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
434  ret void
435}
436
437declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>)
438
439define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m) {
440; RV32-LABEL: mscatter_nxv1i32:
441; RV32:       # %bb.0:
442; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
443; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
444; RV32-NEXT:    ret
445;
446; RV64-LABEL: mscatter_nxv1i32:
447; RV64:       # %bb.0:
448; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
449; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
450; RV64-NEXT:    ret
451  call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, i32 4, <vscale x 1 x i1> %m)
452  ret void
453}
454
455declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
456
457define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) {
458; RV32-LABEL: mscatter_nxv2i32:
459; RV32:       # %bb.0:
460; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
461; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
462; RV32-NEXT:    ret
463;
464; RV64-LABEL: mscatter_nxv2i32:
465; RV64:       # %bb.0:
466; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
467; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
468; RV64-NEXT:    ret
469  call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m)
470  ret void
471}
472
473define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) {
474; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
475; RV32:       # %bb.0:
476; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
477; RV32-NEXT:    vnsrl.wi v25, v8, 0
478; RV32-NEXT:    vsoxei32.v v25, (zero), v10, v0.t
479; RV32-NEXT:    ret
480;
481; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
482; RV64:       # %bb.0:
483; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
484; RV64-NEXT:    vnsrl.wi v25, v8, 0
485; RV64-NEXT:    vsoxei64.v v25, (zero), v10, v0.t
486; RV64-NEXT:    ret
487  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
488  call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m)
489  ret void
490}
491
492declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
493
494define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m) {
495; RV32-LABEL: mscatter_nxv4i32:
496; RV32:       # %bb.0:
497; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
498; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
499; RV32-NEXT:    ret
500;
501; RV64-LABEL: mscatter_nxv4i32:
502; RV64:       # %bb.0:
503; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
504; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
505; RV64-NEXT:    ret
506  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %m)
507  ret void
508}
509
510define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) {
511; RV32-LABEL: mscatter_truemask_nxv4i32:
512; RV32:       # %bb.0:
513; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
514; RV32-NEXT:    vsoxei32.v v8, (zero), v10
515; RV32-NEXT:    ret
516;
517; RV64-LABEL: mscatter_truemask_nxv4i32:
518; RV64:       # %bb.0:
519; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
520; RV64-NEXT:    vsoxei64.v v8, (zero), v12
521; RV64-NEXT:    ret
522  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
523  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
524  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
525  ret void
526}
527
528define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) {
529; RV32-LABEL: mscatter_falsemask_nxv4i32:
530; RV32:       # %bb.0:
531; RV32-NEXT:    ret
532;
533; RV64-LABEL: mscatter_falsemask_nxv4i32:
534; RV64:       # %bb.0:
535; RV64-NEXT:    ret
536  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
537  ret void
538}
539
540declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)
541
542define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m) {
543; RV32-LABEL: mscatter_nxv8i32:
544; RV32:       # %bb.0:
545; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
546; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
547; RV32-NEXT:    ret
548;
549; RV64-LABEL: mscatter_nxv8i32:
550; RV64:       # %bb.0:
551; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
552; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
553; RV64-NEXT:    ret
554  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
555  ret void
556}
557
558define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
559; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
560; RV32:       # %bb.0:
561; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
562; RV32-NEXT:    vsext.vf4 v28, v12
563; RV32-NEXT:    vsll.vi v28, v28, 2
564; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
565; RV32-NEXT:    ret
566;
567; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
568; RV64:       # %bb.0:
569; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
570; RV64-NEXT:    vsext.vf8 v16, v12
571; RV64-NEXT:    vsll.vi v16, v16, 2
572; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
573; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
574; RV64-NEXT:    ret
575  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs
576  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
577  ret void
578}
579
580define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
581; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
582; RV32:       # %bb.0:
583; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
584; RV32-NEXT:    vsext.vf4 v28, v12
585; RV32-NEXT:    vsll.vi v28, v28, 2
586; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
587; RV32-NEXT:    ret
588;
589; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
590; RV64:       # %bb.0:
591; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
592; RV64-NEXT:    vsext.vf8 v16, v12
593; RV64-NEXT:    vsll.vi v16, v16, 2
594; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
595; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
596; RV64-NEXT:    ret
597  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
598  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
599  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
600  ret void
601}
602
603define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
604; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
605; RV32:       # %bb.0:
606; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
607; RV32-NEXT:    vzext.vf4 v28, v12
608; RV32-NEXT:    vsll.vi v28, v28, 2
609; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
610; RV32-NEXT:    ret
611;
612; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
613; RV64:       # %bb.0:
614; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
615; RV64-NEXT:    vzext.vf8 v16, v12
616; RV64-NEXT:    vsll.vi v16, v16, 2
617; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
618; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
619; RV64-NEXT:    ret
620  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
621  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
622  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
623  ret void
624}
625
626define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
627; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
628; RV32:       # %bb.0:
629; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
630; RV32-NEXT:    vsext.vf2 v28, v12
631; RV32-NEXT:    vsll.vi v28, v28, 2
632; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
633; RV32-NEXT:    ret
634;
635; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
636; RV64:       # %bb.0:
637; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
638; RV64-NEXT:    vsext.vf4 v16, v12
639; RV64-NEXT:    vsll.vi v16, v16, 2
640; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
641; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
642; RV64-NEXT:    ret
643  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs
644  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
645  ret void
646}
647
648define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
649; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
650; RV32:       # %bb.0:
651; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
652; RV32-NEXT:    vsext.vf2 v28, v12
653; RV32-NEXT:    vsll.vi v28, v28, 2
654; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
655; RV32-NEXT:    ret
656;
657; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
658; RV64:       # %bb.0:
659; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
660; RV64-NEXT:    vsext.vf4 v16, v12
661; RV64-NEXT:    vsll.vi v16, v16, 2
662; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
663; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
664; RV64-NEXT:    ret
665  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
666  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
667  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
668  ret void
669}
670
671define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
672; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
673; RV32:       # %bb.0:
674; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
675; RV32-NEXT:    vzext.vf2 v28, v12
676; RV32-NEXT:    vsll.vi v28, v28, 2
677; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
678; RV32-NEXT:    ret
679;
680; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
681; RV64:       # %bb.0:
682; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
683; RV64-NEXT:    vzext.vf4 v16, v12
684; RV64-NEXT:    vsll.vi v16, v16, 2
685; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
686; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
687; RV64-NEXT:    ret
688  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
689  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
690  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
691  ret void
692}
693
694define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
695; RV32-LABEL: mscatter_baseidx_nxv8i32:
696; RV32:       # %bb.0:
697; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
698; RV32-NEXT:    vsll.vi v28, v12, 2
699; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
700; RV32-NEXT:    ret
701;
702; RV64-LABEL: mscatter_baseidx_nxv8i32:
703; RV64:       # %bb.0:
704; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
705; RV64-NEXT:    vsext.vf2 v16, v12
706; RV64-NEXT:    vsll.vi v16, v16, 2
707; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
708; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
709; RV64-NEXT:    ret
710  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs
711  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
712  ret void
713}
714
715declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>)
716
717define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m) {
718; RV32-LABEL: mscatter_nxv1i64:
719; RV32:       # %bb.0:
720; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
721; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
722; RV32-NEXT:    ret
723;
724; RV64-LABEL: mscatter_nxv1i64:
725; RV64:       # %bb.0:
726; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
727; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
728; RV64-NEXT:    ret
729  call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, i32 8, <vscale x 1 x i1> %m)
730  ret void
731}
732
733declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
734
735define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m) {
736; RV32-LABEL: mscatter_nxv2i64:
737; RV32:       # %bb.0:
738; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
739; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
740; RV32-NEXT:    ret
741;
742; RV64-LABEL: mscatter_nxv2i64:
743; RV64:       # %bb.0:
744; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
745; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
746; RV64-NEXT:    ret
747  call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %m)
748  ret void
749}
750
751declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>)
752
753define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m) {
754; RV32-LABEL: mscatter_nxv4i64:
755; RV32:       # %bb.0:
756; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
757; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
758; RV32-NEXT:    ret
759;
760; RV64-LABEL: mscatter_nxv4i64:
761; RV64:       # %bb.0:
762; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
763; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
764; RV64-NEXT:    ret
765  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %m)
766  ret void
767}
768
769define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) {
770; RV32-LABEL: mscatter_truemask_nxv4i64:
771; RV32:       # %bb.0:
772; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
773; RV32-NEXT:    vsoxei32.v v8, (zero), v12
774; RV32-NEXT:    ret
775;
776; RV64-LABEL: mscatter_truemask_nxv4i64:
777; RV64:       # %bb.0:
778; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
779; RV64-NEXT:    vsoxei64.v v8, (zero), v12
780; RV64-NEXT:    ret
781  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
782  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
783  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
784  ret void
785}
786
787define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) {
788; RV32-LABEL: mscatter_falsemask_nxv4i64:
789; RV32:       # %bb.0:
790; RV32-NEXT:    ret
791;
792; RV64-LABEL: mscatter_falsemask_nxv4i64:
793; RV64:       # %bb.0:
794; RV64-NEXT:    ret
795  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
796  ret void
797}
798
799declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>)
800
801define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m) {
802; RV32-LABEL: mscatter_nxv8i64:
803; RV32:       # %bb.0:
804; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
805; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
806; RV32-NEXT:    ret
807;
808; RV64-LABEL: mscatter_nxv8i64:
809; RV64:       # %bb.0:
810; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
811; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
812; RV64-NEXT:    ret
813  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
814  ret void
815}
816
817define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
818; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
819; RV32:       # %bb.0:
820; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
821; RV32-NEXT:    vsext.vf4 v28, v16
822; RV32-NEXT:    vsll.vi v28, v28, 3
823; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
824; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
825; RV32-NEXT:    ret
826;
827; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
828; RV64:       # %bb.0:
829; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
830; RV64-NEXT:    vsext.vf8 v24, v16
831; RV64-NEXT:    vsll.vi v16, v24, 3
832; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
833; RV64-NEXT:    ret
834  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs
835  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
836  ret void
837}
838
839define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
840; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
841; RV32:       # %bb.0:
842; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
843; RV32-NEXT:    vsext.vf8 v24, v16
844; RV32-NEXT:    vsll.vi v16, v24, 3
845; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
846; RV32-NEXT:    ret
847;
848; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
849; RV64:       # %bb.0:
850; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
851; RV64-NEXT:    vsext.vf8 v24, v16
852; RV64-NEXT:    vsll.vi v16, v24, 3
853; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
854; RV64-NEXT:    ret
855  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
856  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
857  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
858  ret void
859}
860
861define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
862; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
863; RV32:       # %bb.0:
864; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
865; RV32-NEXT:    vzext.vf8 v24, v16
866; RV32-NEXT:    vsll.vi v16, v24, 3
867; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
868; RV32-NEXT:    ret
869;
870; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
871; RV64:       # %bb.0:
872; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
873; RV64-NEXT:    vzext.vf8 v24, v16
874; RV64-NEXT:    vsll.vi v16, v24, 3
875; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
876; RV64-NEXT:    ret
877  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
878  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
879  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
880  ret void
881}
882
883define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
884; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
885; RV32:       # %bb.0:
886; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
887; RV32-NEXT:    vsext.vf2 v28, v16
888; RV32-NEXT:    vsll.vi v28, v28, 3
889; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
890; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
891; RV32-NEXT:    ret
892;
893; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
894; RV64:       # %bb.0:
895; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
896; RV64-NEXT:    vsext.vf4 v24, v16
897; RV64-NEXT:    vsll.vi v16, v24, 3
898; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
899; RV64-NEXT:    ret
900  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs
901  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
902  ret void
903}
904
905define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
906; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
907; RV32:       # %bb.0:
908; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
909; RV32-NEXT:    vsext.vf4 v24, v16
910; RV32-NEXT:    vsll.vi v16, v24, 3
911; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
912; RV32-NEXT:    ret
913;
914; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
915; RV64:       # %bb.0:
916; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
917; RV64-NEXT:    vsext.vf4 v24, v16
918; RV64-NEXT:    vsll.vi v16, v24, 3
919; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
920; RV64-NEXT:    ret
921  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
922  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
923  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
924  ret void
925}
926
927define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
928; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
929; RV32:       # %bb.0:
930; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
931; RV32-NEXT:    vzext.vf4 v24, v16
932; RV32-NEXT:    vsll.vi v16, v24, 3
933; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
934; RV32-NEXT:    ret
935;
936; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
937; RV64:       # %bb.0:
938; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
939; RV64-NEXT:    vzext.vf4 v24, v16
940; RV64-NEXT:    vsll.vi v16, v24, 3
941; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
942; RV64-NEXT:    ret
943  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
944  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
945  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
946  ret void
947}
948
949define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
950; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
951; RV32:       # %bb.0:
952; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
953; RV32-NEXT:    vsll.vi v28, v16, 3
954; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
955; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
956; RV32-NEXT:    ret
957;
958; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
959; RV64:       # %bb.0:
960; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
961; RV64-NEXT:    vsext.vf2 v24, v16
962; RV64-NEXT:    vsll.vi v16, v24, 3
963; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
964; RV64-NEXT:    ret
965  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs
966  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
967  ret void
968}
969
970define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
971; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
972; RV32:       # %bb.0:
973; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
974; RV32-NEXT:    vsext.vf2 v24, v16
975; RV32-NEXT:    vsll.vi v16, v24, 3
976; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
977; RV32-NEXT:    ret
978;
979; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
980; RV64:       # %bb.0:
981; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
982; RV64-NEXT:    vsext.vf2 v24, v16
983; RV64-NEXT:    vsll.vi v16, v24, 3
984; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
985; RV64-NEXT:    ret
986  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
987  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
988  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
989  ret void
990}
991
992define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
993; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
994; RV32:       # %bb.0:
995; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
996; RV32-NEXT:    vzext.vf2 v24, v16
997; RV32-NEXT:    vsll.vi v16, v24, 3
998; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
999; RV32-NEXT:    ret
1000;
1001; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
1002; RV64:       # %bb.0:
1003; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1004; RV64-NEXT:    vzext.vf2 v24, v16
1005; RV64-NEXT:    vsll.vi v16, v24, 3
1006; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1007; RV64-NEXT:    ret
1008  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1009  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1010  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1011  ret void
1012}
1013
1014define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1015; RV32-LABEL: mscatter_baseidx_nxv8i64:
1016; RV32:       # %bb.0:
1017; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1018; RV32-NEXT:    vsll.vi v16, v16, 3
1019; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1020; RV32-NEXT:    ret
1021;
1022; RV64-LABEL: mscatter_baseidx_nxv8i64:
1023; RV64:       # %bb.0:
1024; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1025; RV64-NEXT:    vsll.vi v16, v16, 3
1026; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1027; RV64-NEXT:    ret
1028  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs
1029  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1030  ret void
1031}
1032
1033declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>)
1034
1035define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m) {
1036; RV32-LABEL: mscatter_nxv1f16:
1037; RV32:       # %bb.0:
1038; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1039; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1040; RV32-NEXT:    ret
1041;
1042; RV64-LABEL: mscatter_nxv1f16:
1043; RV64:       # %bb.0:
1044; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1045; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1046; RV64-NEXT:    ret
1047  call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, i32 2, <vscale x 1 x i1> %m)
1048  ret void
1049}
1050
1051declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
1052
1053define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m) {
1054; RV32-LABEL: mscatter_nxv2f16:
1055; RV32:       # %bb.0:
1056; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1057; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1058; RV32-NEXT:    ret
1059;
1060; RV64-LABEL: mscatter_nxv2f16:
1061; RV64:       # %bb.0:
1062; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1063; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1064; RV64-NEXT:    ret
1065  call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %m)
1066  ret void
1067}
1068
1069declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
1070
1071define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m) {
1072; RV32-LABEL: mscatter_nxv4f16:
1073; RV32:       # %bb.0:
1074; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1075; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1076; RV32-NEXT:    ret
1077;
1078; RV64-LABEL: mscatter_nxv4f16:
1079; RV64:       # %bb.0:
1080; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1081; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1082; RV64-NEXT:    ret
1083  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %m)
1084  ret void
1085}
1086
1087define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) {
1088; RV32-LABEL: mscatter_truemask_nxv4f16:
1089; RV32:       # %bb.0:
1090; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1091; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1092; RV32-NEXT:    ret
1093;
1094; RV64-LABEL: mscatter_truemask_nxv4f16:
1095; RV64:       # %bb.0:
1096; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1097; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1098; RV64-NEXT:    ret
1099  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
1100  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
1101  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
1102  ret void
1103}
1104
1105define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) {
1106; RV32-LABEL: mscatter_falsemask_nxv4f16:
1107; RV32:       # %bb.0:
1108; RV32-NEXT:    ret
1109;
1110; RV64-LABEL: mscatter_falsemask_nxv4f16:
1111; RV64:       # %bb.0:
1112; RV64-NEXT:    ret
1113  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1114  ret void
1115}
1116
1117declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>)
1118
1119define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m) {
1120; RV32-LABEL: mscatter_nxv8f16:
1121; RV32:       # %bb.0:
1122; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1123; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1124; RV32-NEXT:    ret
1125;
1126; RV64-LABEL: mscatter_nxv8f16:
1127; RV64:       # %bb.0:
1128; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1129; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1130; RV64-NEXT:    ret
1131  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1132  ret void
1133}
1134
1135define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1136; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1137; RV32:       # %bb.0:
1138; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1139; RV32-NEXT:    vsext.vf4 v28, v10
1140; RV32-NEXT:    vadd.vv v28, v28, v28
1141; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1142; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1143; RV32-NEXT:    ret
1144;
1145; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1146; RV64:       # %bb.0:
1147; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1148; RV64-NEXT:    vsext.vf8 v16, v10
1149; RV64-NEXT:    vadd.vv v16, v16, v16
1150; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1151; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1152; RV64-NEXT:    ret
1153  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs
1154  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1155  ret void
1156}
1157
1158define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1159; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1160; RV32:       # %bb.0:
1161; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1162; RV32-NEXT:    vsext.vf4 v28, v10
1163; RV32-NEXT:    vadd.vv v28, v28, v28
1164; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1165; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1166; RV32-NEXT:    ret
1167;
1168; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1169; RV64:       # %bb.0:
1170; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1171; RV64-NEXT:    vsext.vf8 v16, v10
1172; RV64-NEXT:    vadd.vv v16, v16, v16
1173; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1174; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1175; RV64-NEXT:    ret
1176  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1177  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1178  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1179  ret void
1180}
1181
1182define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1183; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1184; RV32:       # %bb.0:
1185; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1186; RV32-NEXT:    vzext.vf4 v28, v10
1187; RV32-NEXT:    vadd.vv v28, v28, v28
1188; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1189; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1190; RV32-NEXT:    ret
1191;
1192; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1193; RV64:       # %bb.0:
1194; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1195; RV64-NEXT:    vzext.vf8 v16, v10
1196; RV64-NEXT:    vadd.vv v16, v16, v16
1197; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1198; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1199; RV64-NEXT:    ret
1200  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1201  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1202  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1203  ret void
1204}
1205
1206define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1207; RV32-LABEL: mscatter_baseidx_nxv8f16:
1208; RV32:       # %bb.0:
1209; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1210; RV32-NEXT:    vsext.vf2 v28, v10
1211; RV32-NEXT:    vadd.vv v28, v28, v28
1212; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1213; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1214; RV32-NEXT:    ret
1215;
1216; RV64-LABEL: mscatter_baseidx_nxv8f16:
1217; RV64:       # %bb.0:
1218; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1219; RV64-NEXT:    vsext.vf4 v16, v10
1220; RV64-NEXT:    vadd.vv v16, v16, v16
1221; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1222; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1223; RV64-NEXT:    ret
1224  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs
1225  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1226  ret void
1227}
1228
1229declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>)
1230
1231define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m) {
1232; RV32-LABEL: mscatter_nxv1f32:
1233; RV32:       # %bb.0:
1234; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1235; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1236; RV32-NEXT:    ret
1237;
1238; RV64-LABEL: mscatter_nxv1f32:
1239; RV64:       # %bb.0:
1240; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1241; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1242; RV64-NEXT:    ret
1243  call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, i32 4, <vscale x 1 x i1> %m)
1244  ret void
1245}
1246
1247declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
1248
1249define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m) {
1250; RV32-LABEL: mscatter_nxv2f32:
1251; RV32:       # %bb.0:
1252; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1253; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1254; RV32-NEXT:    ret
1255;
1256; RV64-LABEL: mscatter_nxv2f32:
1257; RV64:       # %bb.0:
1258; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1259; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1260; RV64-NEXT:    ret
1261  call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %m)
1262  ret void
1263}
1264
1265declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
1266
1267define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m) {
1268; RV32-LABEL: mscatter_nxv4f32:
1269; RV32:       # %bb.0:
1270; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1271; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1272; RV32-NEXT:    ret
1273;
1274; RV64-LABEL: mscatter_nxv4f32:
1275; RV64:       # %bb.0:
1276; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1277; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1278; RV64-NEXT:    ret
1279  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %m)
1280  ret void
1281}
1282
1283define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) {
1284; RV32-LABEL: mscatter_truemask_nxv4f32:
1285; RV32:       # %bb.0:
1286; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1287; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1288; RV32-NEXT:    ret
1289;
1290; RV64-LABEL: mscatter_truemask_nxv4f32:
1291; RV64:       # %bb.0:
1292; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1293; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1294; RV64-NEXT:    ret
1295  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
1296  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
1297  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
1298  ret void
1299}
1300
1301define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) {
1302; RV32-LABEL: mscatter_falsemask_nxv4f32:
1303; RV32:       # %bb.0:
1304; RV32-NEXT:    ret
1305;
1306; RV64-LABEL: mscatter_falsemask_nxv4f32:
1307; RV64:       # %bb.0:
1308; RV64-NEXT:    ret
1309  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
1310  ret void
1311}
1312
1313declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>)
1314
1315define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m) {
1316; RV32-LABEL: mscatter_nxv8f32:
1317; RV32:       # %bb.0:
1318; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1319; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1320; RV32-NEXT:    ret
1321;
1322; RV64-LABEL: mscatter_nxv8f32:
1323; RV64:       # %bb.0:
1324; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1325; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1326; RV64-NEXT:    ret
1327  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1328  ret void
1329}
1330
1331define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1332; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1333; RV32:       # %bb.0:
1334; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1335; RV32-NEXT:    vsext.vf4 v28, v12
1336; RV32-NEXT:    vsll.vi v28, v28, 2
1337; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1338; RV32-NEXT:    ret
1339;
1340; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1341; RV64:       # %bb.0:
1342; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1343; RV64-NEXT:    vsext.vf8 v16, v12
1344; RV64-NEXT:    vsll.vi v16, v16, 2
1345; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1346; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1347; RV64-NEXT:    ret
1348  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs
1349  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1350  ret void
1351}
1352
1353define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1354; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1355; RV32:       # %bb.0:
1356; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1357; RV32-NEXT:    vsext.vf4 v28, v12
1358; RV32-NEXT:    vsll.vi v28, v28, 2
1359; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1360; RV32-NEXT:    ret
1361;
1362; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1363; RV64:       # %bb.0:
1364; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1365; RV64-NEXT:    vsext.vf8 v16, v12
1366; RV64-NEXT:    vsll.vi v16, v16, 2
1367; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1368; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1369; RV64-NEXT:    ret
1370  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1371  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1372  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1373  ret void
1374}
1375
1376define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1377; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1378; RV32:       # %bb.0:
1379; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1380; RV32-NEXT:    vzext.vf4 v28, v12
1381; RV32-NEXT:    vsll.vi v28, v28, 2
1382; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1383; RV32-NEXT:    ret
1384;
1385; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1386; RV64:       # %bb.0:
1387; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1388; RV64-NEXT:    vzext.vf8 v16, v12
1389; RV64-NEXT:    vsll.vi v16, v16, 2
1390; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1391; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1392; RV64-NEXT:    ret
1393  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1394  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1395  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1396  ret void
1397}
1398
1399define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1400; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1401; RV32:       # %bb.0:
1402; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1403; RV32-NEXT:    vsext.vf2 v28, v12
1404; RV32-NEXT:    vsll.vi v28, v28, 2
1405; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1406; RV32-NEXT:    ret
1407;
1408; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1409; RV64:       # %bb.0:
1410; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1411; RV64-NEXT:    vsext.vf4 v16, v12
1412; RV64-NEXT:    vsll.vi v16, v16, 2
1413; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1414; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1415; RV64-NEXT:    ret
1416  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs
1417  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1418  ret void
1419}
1420
1421define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1422; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1423; RV32:       # %bb.0:
1424; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1425; RV32-NEXT:    vsext.vf2 v28, v12
1426; RV32-NEXT:    vsll.vi v28, v28, 2
1427; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1428; RV32-NEXT:    ret
1429;
1430; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1431; RV64:       # %bb.0:
1432; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1433; RV64-NEXT:    vsext.vf4 v16, v12
1434; RV64-NEXT:    vsll.vi v16, v16, 2
1435; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1436; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1437; RV64-NEXT:    ret
1438  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1439  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1440  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1441  ret void
1442}
1443
1444define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1445; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1446; RV32:       # %bb.0:
1447; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1448; RV32-NEXT:    vzext.vf2 v28, v12
1449; RV32-NEXT:    vsll.vi v28, v28, 2
1450; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1451; RV32-NEXT:    ret
1452;
1453; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1454; RV64:       # %bb.0:
1455; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1456; RV64-NEXT:    vzext.vf4 v16, v12
1457; RV64-NEXT:    vsll.vi v16, v16, 2
1458; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1459; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1460; RV64-NEXT:    ret
1461  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1462  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1463  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1464  ret void
1465}
1466
1467define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1468; RV32-LABEL: mscatter_baseidx_nxv8f32:
1469; RV32:       # %bb.0:
1470; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1471; RV32-NEXT:    vsll.vi v28, v12, 2
1472; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1473; RV32-NEXT:    ret
1474;
1475; RV64-LABEL: mscatter_baseidx_nxv8f32:
1476; RV64:       # %bb.0:
1477; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1478; RV64-NEXT:    vsext.vf2 v16, v12
1479; RV64-NEXT:    vsll.vi v16, v16, 2
1480; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1481; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1482; RV64-NEXT:    ret
1483  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs
1484  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1485  ret void
1486}
1487
1488declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>)
1489
1490define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m) {
1491; RV32-LABEL: mscatter_nxv1f64:
1492; RV32:       # %bb.0:
1493; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1494; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1495; RV32-NEXT:    ret
1496;
1497; RV64-LABEL: mscatter_nxv1f64:
1498; RV64:       # %bb.0:
1499; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1500; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1501; RV64-NEXT:    ret
1502  call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, i32 8, <vscale x 1 x i1> %m)
1503  ret void
1504}
1505
1506declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
1507
1508define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m) {
1509; RV32-LABEL: mscatter_nxv2f64:
1510; RV32:       # %bb.0:
1511; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1512; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1513; RV32-NEXT:    ret
1514;
1515; RV64-LABEL: mscatter_nxv2f64:
1516; RV64:       # %bb.0:
1517; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1518; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1519; RV64-NEXT:    ret
1520  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %m)
1521  ret void
1522}
1523
1524declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>)
1525
1526define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m) {
1527; RV32-LABEL: mscatter_nxv4f64:
1528; RV32:       # %bb.0:
1529; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1530; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1531; RV32-NEXT:    ret
1532;
1533; RV64-LABEL: mscatter_nxv4f64:
1534; RV64:       # %bb.0:
1535; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1536; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1537; RV64-NEXT:    ret
1538  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %m)
1539  ret void
1540}
1541
1542define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) {
1543; RV32-LABEL: mscatter_truemask_nxv4f64:
1544; RV32:       # %bb.0:
1545; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1546; RV32-NEXT:    vsoxei32.v v8, (zero), v12
1547; RV32-NEXT:    ret
1548;
1549; RV64-LABEL: mscatter_truemask_nxv4f64:
1550; RV64:       # %bb.0:
1551; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1552; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1553; RV64-NEXT:    ret
1554  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
1555  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
1556  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
1557  ret void
1558}
1559
1560define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) {
1561; RV32-LABEL: mscatter_falsemask_nxv4f64:
1562; RV32:       # %bb.0:
1563; RV32-NEXT:    ret
1564;
1565; RV64-LABEL: mscatter_falsemask_nxv4f64:
1566; RV64:       # %bb.0:
1567; RV64-NEXT:    ret
1568  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
1569  ret void
1570}
1571
1572declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>)
1573
1574define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m) {
1575; RV32-LABEL: mscatter_nxv8f64:
1576; RV32:       # %bb.0:
1577; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1578; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
1579; RV32-NEXT:    ret
1580;
1581; RV64-LABEL: mscatter_nxv8f64:
1582; RV64:       # %bb.0:
1583; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1584; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1585; RV64-NEXT:    ret
1586  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1587  ret void
1588}
1589
1590define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1591; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1592; RV32:       # %bb.0:
1593; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1594; RV32-NEXT:    vsext.vf4 v28, v16
1595; RV32-NEXT:    vsll.vi v28, v28, 3
1596; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1597; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1598; RV32-NEXT:    ret
1599;
1600; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1601; RV64:       # %bb.0:
1602; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1603; RV64-NEXT:    vsext.vf8 v24, v16
1604; RV64-NEXT:    vsll.vi v16, v24, 3
1605; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1606; RV64-NEXT:    ret
1607  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs
1608  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1609  ret void
1610}
1611
1612define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1613; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1614; RV32:       # %bb.0:
1615; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1616; RV32-NEXT:    vsext.vf8 v24, v16
1617; RV32-NEXT:    vsll.vi v16, v24, 3
1618; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1619; RV32-NEXT:    ret
1620;
1621; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1622; RV64:       # %bb.0:
1623; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1624; RV64-NEXT:    vsext.vf8 v24, v16
1625; RV64-NEXT:    vsll.vi v16, v24, 3
1626; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1627; RV64-NEXT:    ret
1628  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1629  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1630  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1631  ret void
1632}
1633
1634define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1635; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1636; RV32:       # %bb.0:
1637; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1638; RV32-NEXT:    vzext.vf8 v24, v16
1639; RV32-NEXT:    vsll.vi v16, v24, 3
1640; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1641; RV32-NEXT:    ret
1642;
1643; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1644; RV64:       # %bb.0:
1645; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1646; RV64-NEXT:    vzext.vf8 v24, v16
1647; RV64-NEXT:    vsll.vi v16, v24, 3
1648; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1649; RV64-NEXT:    ret
1650  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1651  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1652  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1653  ret void
1654}
1655
1656define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1657; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1658; RV32:       # %bb.0:
1659; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1660; RV32-NEXT:    vsext.vf2 v28, v16
1661; RV32-NEXT:    vsll.vi v28, v28, 3
1662; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1663; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1664; RV32-NEXT:    ret
1665;
1666; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1667; RV64:       # %bb.0:
1668; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1669; RV64-NEXT:    vsext.vf4 v24, v16
1670; RV64-NEXT:    vsll.vi v16, v24, 3
1671; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1672; RV64-NEXT:    ret
1673  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs
1674  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1675  ret void
1676}
1677
1678define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1679; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1680; RV32:       # %bb.0:
1681; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1682; RV32-NEXT:    vsext.vf4 v24, v16
1683; RV32-NEXT:    vsll.vi v16, v24, 3
1684; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1685; RV32-NEXT:    ret
1686;
1687; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1688; RV64:       # %bb.0:
1689; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1690; RV64-NEXT:    vsext.vf4 v24, v16
1691; RV64-NEXT:    vsll.vi v16, v24, 3
1692; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1693; RV64-NEXT:    ret
1694  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1695  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1696  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1697  ret void
1698}
1699
1700define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1701; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1702; RV32:       # %bb.0:
1703; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1704; RV32-NEXT:    vzext.vf4 v24, v16
1705; RV32-NEXT:    vsll.vi v16, v24, 3
1706; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1707; RV32-NEXT:    ret
1708;
1709; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1710; RV64:       # %bb.0:
1711; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1712; RV64-NEXT:    vzext.vf4 v24, v16
1713; RV64-NEXT:    vsll.vi v16, v24, 3
1714; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1715; RV64-NEXT:    ret
1716  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1717  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1718  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1719  ret void
1720}
1721
1722define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1723; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1724; RV32:       # %bb.0:
1725; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1726; RV32-NEXT:    vsll.vi v28, v16, 3
1727; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1728; RV32-NEXT:    vsoxei32.v v8, (a0), v28, v0.t
1729; RV32-NEXT:    ret
1730;
1731; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1732; RV64:       # %bb.0:
1733; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1734; RV64-NEXT:    vsext.vf2 v24, v16
1735; RV64-NEXT:    vsll.vi v16, v24, 3
1736; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1737; RV64-NEXT:    ret
1738  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs
1739  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1740  ret void
1741}
1742
1743define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1744; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1745; RV32:       # %bb.0:
1746; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1747; RV32-NEXT:    vsext.vf2 v24, v16
1748; RV32-NEXT:    vsll.vi v16, v24, 3
1749; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1750; RV32-NEXT:    ret
1751;
1752; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1753; RV64:       # %bb.0:
1754; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1755; RV64-NEXT:    vsext.vf2 v24, v16
1756; RV64-NEXT:    vsll.vi v16, v24, 3
1757; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1758; RV64-NEXT:    ret
1759  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1760  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1761  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1762  ret void
1763}
1764
1765define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1766; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1767; RV32:       # %bb.0:
1768; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1769; RV32-NEXT:    vzext.vf2 v24, v16
1770; RV32-NEXT:    vsll.vi v16, v24, 3
1771; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1772; RV32-NEXT:    ret
1773;
1774; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1775; RV64:       # %bb.0:
1776; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1777; RV64-NEXT:    vzext.vf2 v24, v16
1778; RV64-NEXT:    vsll.vi v16, v24, 3
1779; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1780; RV64-NEXT:    ret
1781  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1782  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1783  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1784  ret void
1785}
1786
1787define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1788; RV32-LABEL: mscatter_baseidx_nxv8f64:
1789; RV32:       # %bb.0:
1790; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1791; RV32-NEXT:    vsll.vi v16, v16, 3
1792; RV32-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1793; RV32-NEXT:    ret
1794;
1795; RV64-LABEL: mscatter_baseidx_nxv8f64:
1796; RV64:       # %bb.0:
1797; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1798; RV64-NEXT:    vsll.vi v16, v16, 3
1799; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1800; RV64-NEXT:    ret
1801  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs
1802  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1803  ret void
1804}
1805
1806declare void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, i32, <vscale x 16 x i1>)
1807
1808declare <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
1809declare <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64)
1810
1811define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x double*> %ptrs0, <vscale x 8 x double*> %ptrs1, <vscale x 16 x i1> %m) {
1812; RV32-LABEL: mscatter_nxv16f64:
1813; RV32:       # %bb.0:
1814; RV32-NEXT:    vl4re32.v v28, (a0)
1815; RV32-NEXT:    vl4re32.v v24, (a1)
1816; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1817; RV32-NEXT:    vsoxei32.v v8, (zero), v28, v0.t
1818; RV32-NEXT:    csrr a0, vlenb
1819; RV32-NEXT:    srli a0, a0, 3
1820; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
1821; RV32-NEXT:    vslidedown.vx v0, v0, a0
1822; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1823; RV32-NEXT:    vsoxei32.v v16, (zero), v24, v0.t
1824; RV32-NEXT:    ret
1825;
1826; RV64-LABEL: mscatter_nxv16f64:
1827; RV64:       # %bb.0:
1828; RV64-NEXT:    addi sp, sp, -16
1829; RV64-NEXT:    .cfi_def_cfa_offset 16
1830; RV64-NEXT:    csrr a2, vlenb
1831; RV64-NEXT:    slli a2, a2, 3
1832; RV64-NEXT:    sub sp, sp, a2
1833; RV64-NEXT:    vl8re64.v v24, (a0)
1834; RV64-NEXT:    addi a0, sp, 16
1835; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1836; RV64-NEXT:    vl8re64.v v16, (a1)
1837; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1838; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
1839; RV64-NEXT:    csrr a0, vlenb
1840; RV64-NEXT:    srli a0, a0, 3
1841; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
1842; RV64-NEXT:    vslidedown.vx v0, v0, a0
1843; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1844; RV64-NEXT:    addi a0, sp, 16
1845; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
1846; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1847; RV64-NEXT:    csrr a0, vlenb
1848; RV64-NEXT:    slli a0, a0, 3
1849; RV64-NEXT:    add sp, sp, a0
1850; RV64-NEXT:    addi sp, sp, 16
1851; RV64-NEXT:    ret
1852  %p0 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0)
1853  %p1 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8)
1854  %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1855  %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1856  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %p1, i32 8, <vscale x 16 x i1> %m)
1857  ret void
1858}
1859
1860define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
1861; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1862; RV32:       # %bb.0:
1863; RV32-NEXT:    vl2r.v v2, (a1)
1864; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1865; RV32-NEXT:    vsext.vf4 v24, v2
1866; RV32-NEXT:    vsll.vi v24, v24, 3
1867; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1868; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1869; RV32-NEXT:    csrr a1, vlenb
1870; RV32-NEXT:    srli a1, a1, 3
1871; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1872; RV32-NEXT:    vslidedown.vx v0, v0, a1
1873; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1874; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
1875; RV32-NEXT:    ret
1876;
1877; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1878; RV64:       # %bb.0:
1879; RV64-NEXT:    vl2r.v v2, (a1)
1880; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1881; RV64-NEXT:    vsext.vf8 v24, v2
1882; RV64-NEXT:    vsll.vi v24, v24, 3
1883; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1884; RV64-NEXT:    csrr a1, vlenb
1885; RV64-NEXT:    srli a1, a1, 3
1886; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1887; RV64-NEXT:    vslidedown.vx v0, v0, a1
1888; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1889; RV64-NEXT:    vsext.vf8 v8, v3
1890; RV64-NEXT:    vsll.vi v8, v8, 3
1891; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1892; RV64-NEXT:    ret
1893  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i8> %idxs
1894  %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1895  %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1896  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
1897  ret void
1898}
1899
1900define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
1901; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1902; RV32:       # %bb.0:
1903; RV32-NEXT:    vl4re16.v v4, (a1)
1904; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1905; RV32-NEXT:    vsext.vf2 v24, v4
1906; RV32-NEXT:    vsll.vi v24, v24, 3
1907; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1908; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1909; RV32-NEXT:    csrr a1, vlenb
1910; RV32-NEXT:    srli a1, a1, 3
1911; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1912; RV32-NEXT:    vslidedown.vx v0, v0, a1
1913; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1914; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
1915; RV32-NEXT:    ret
1916;
1917; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1918; RV64:       # %bb.0:
1919; RV64-NEXT:    vl4re16.v v4, (a1)
1920; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1921; RV64-NEXT:    vsext.vf4 v24, v4
1922; RV64-NEXT:    vsll.vi v24, v24, 3
1923; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1924; RV64-NEXT:    csrr a1, vlenb
1925; RV64-NEXT:    srli a1, a1, 3
1926; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1927; RV64-NEXT:    vslidedown.vx v0, v0, a1
1928; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1929; RV64-NEXT:    vsext.vf4 v8, v6
1930; RV64-NEXT:    vsll.vi v8, v8, 3
1931; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1932; RV64-NEXT:    ret
1933  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
1934  %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1935  %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1936  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
1937  ret void
1938}
1939