1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3
4define void @ptr_iv_v4i32(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) {
5; CHECK-LABEL: ptr_iv_v4i32:
6; CHECK:       @ %bb.0: @ %vector.ph
7; CHECK-NEXT:    .save {r7, lr}
8; CHECK-NEXT:    push {r7, lr}
9; CHECK-NEXT:    mov.w lr, #249
10; CHECK-NEXT:    adr r3, .LCPI0_0
11; CHECK-NEXT:    vldrw.u32 q0, [r3]
12; CHECK-NEXT:  .LBB0_1: @ %vector.body
13; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
14; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
15; CHECK-NEXT:    adds r0, #64
16; CHECK-NEXT:    vadd.i32 q1, q1, r2
17; CHECK-NEXT:    vstrw.32 q1, [r1, q0, uxtw #2]
18; CHECK-NEXT:    adds r1, #64
19; CHECK-NEXT:    le lr, .LBB0_1
20; CHECK-NEXT:  @ %bb.2: @ %end
21; CHECK-NEXT:    pop {r7, pc}
22; CHECK-NEXT:    .p2align 4
23; CHECK-NEXT:  @ %bb.3:
24; CHECK-NEXT:  .LCPI0_0:
25; CHECK-NEXT:    .long 0 @ 0x0
26; CHECK-NEXT:    .long 4 @ 0x4
27; CHECK-NEXT:    .long 8 @ 0x8
28; CHECK-NEXT:    .long 12 @ 0xc
29vector.ph:
30  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %y, i32 0
31  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
32  br label %vector.body
33
34vector.body:
35  %pointer.phi = phi i32* [ %A, %vector.ph ], [ %0, %vector.body ]
36  %pointer.phi13 = phi i32* [ %B, %vector.ph ], [ %2, %vector.body ]
37  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
38  %0 = getelementptr i32, i32* %pointer.phi, i32 16
39  %1 = getelementptr i32, i32* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
40  %2 = getelementptr i32, i32* %pointer.phi13, i32 16
41  %3 = getelementptr i32, i32* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
42  %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
43  %4 = add nsw <4 x i32> %wide.masked.gather, %broadcast.splat
44  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %4, <4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
45  %index.next = add i32 %index, 4
46  %5 = icmp eq i32 %index.next, 996
47  br i1 %5, label %end, label %vector.body
48
49end:
50  ret void
51}
52
53define void @ptr_iv_v4i32_mult(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) {
54; CHECK-LABEL: ptr_iv_v4i32_mult:
55; CHECK:       @ %bb.0: @ %vector.ph
56; CHECK-NEXT:    .save {r7, lr}
57; CHECK-NEXT:    push {r7, lr}
58; CHECK-NEXT:    mov.w lr, #249
59; CHECK-NEXT:    adr r1, .LCPI1_0
60; CHECK-NEXT:    adr r3, .LCPI1_1
61; CHECK-NEXT:    vldrw.u32 q0, [r3]
62; CHECK-NEXT:    vldrw.u32 q1, [r1]
63; CHECK-NEXT:  .LBB1_1: @ %vector.body
64; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
65; CHECK-NEXT:    vldrw.u32 q2, [r0, q0, uxtw #2]
66; CHECK-NEXT:    vadd.i32 q2, q2, r2
67; CHECK-NEXT:    vstrw.32 q2, [r0, q1, uxtw #2]
68; CHECK-NEXT:    adds r0, #64
69; CHECK-NEXT:    le lr, .LBB1_1
70; CHECK-NEXT:  @ %bb.2: @ %end
71; CHECK-NEXT:    pop {r7, pc}
72; CHECK-NEXT:    .p2align 4
73; CHECK-NEXT:  @ %bb.3:
74; CHECK-NEXT:  .LCPI1_0:
75; CHECK-NEXT:    .long 5 @ 0x5
76; CHECK-NEXT:    .long 9 @ 0x9
77; CHECK-NEXT:    .long 13 @ 0xd
78; CHECK-NEXT:    .long 17 @ 0x11
79; CHECK-NEXT:  .LCPI1_1:
80; CHECK-NEXT:    .long 3 @ 0x3
81; CHECK-NEXT:    .long 7 @ 0x7
82; CHECK-NEXT:    .long 11 @ 0xb
83; CHECK-NEXT:    .long 15 @ 0xf
84vector.ph:
85  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %y, i32 0
86  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
87  br label %vector.body
88
89vector.body:
90  %pointer.phi = phi i32* [ %A, %vector.ph ], [ %0, %vector.body ]
91  %pointer.phi13 = phi i32* [ %B, %vector.ph ], [ %2, %vector.body ]
92  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
93  %0 = getelementptr i32, i32* %pointer.phi, i32 16
94  %1 = getelementptr i32, i32* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
95  %gather.address = getelementptr i32, <4 x i32*> %1, i32 3
96  %2 = getelementptr i32, i32* %pointer.phi13, i32 16
97  %3 = getelementptr i32, i32* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
98  %scatter.address = getelementptr i32, <4 x i32*> %1, i32 5
99  %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gather.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
100  %4 = add nsw <4 x i32> %wide.masked.gather, %broadcast.splat
101  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %4, <4 x i32*> %scatter.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
102  %index.next = add i32 %index, 4
103  %5 = icmp eq i32 %index.next, 996
104  br i1 %5, label %end, label %vector.body
105
106end:
107  ret void
108}
109
110define void @ptr_iv_v8i16(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i16 %y) {
111; CHECK-LABEL: ptr_iv_v8i16:
112; CHECK:       @ %bb.0: @ %vector.ph
113; CHECK-NEXT:    .save {r7, lr}
114; CHECK-NEXT:    push {r7, lr}
115; CHECK-NEXT:    mov.w lr, #249
116; CHECK-NEXT:    adr r3, .LCPI2_0
117; CHECK-NEXT:    vldrw.u32 q0, [r3]
118; CHECK-NEXT:  .LBB2_1: @ %vector.body
119; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
120; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
121; CHECK-NEXT:    adds r0, #64
122; CHECK-NEXT:    vadd.i16 q1, q1, r2
123; CHECK-NEXT:    vstrh.16 q1, [r1, q0, uxtw #1]
124; CHECK-NEXT:    adds r1, #64
125; CHECK-NEXT:    le lr, .LBB2_1
126; CHECK-NEXT:  @ %bb.2: @ %end
127; CHECK-NEXT:    pop {r7, pc}
128; CHECK-NEXT:    .p2align 4
129; CHECK-NEXT:  @ %bb.3:
130; CHECK-NEXT:  .LCPI2_0:
131; CHECK-NEXT:    .short 0 @ 0x0
132; CHECK-NEXT:    .short 4 @ 0x4
133; CHECK-NEXT:    .short 8 @ 0x8
134; CHECK-NEXT:    .short 12 @ 0xc
135; CHECK-NEXT:    .short 16 @ 0x10
136; CHECK-NEXT:    .short 20 @ 0x14
137; CHECK-NEXT:    .short 24 @ 0x18
138; CHECK-NEXT:    .short 28 @ 0x1c
139vector.ph:
140  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %y, i32 0
141  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
142  br label %vector.body
143
144vector.body:
145  %pointer.phi = phi i16* [ %A, %vector.ph ], [ %0, %vector.body ]
146  %pointer.phi13 = phi i16* [ %B, %vector.ph ], [ %2, %vector.body ]
147  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
148  %0 = getelementptr i16, i16* %pointer.phi, i32 32
149  %1 = getelementptr i16, i16* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
150  %2 = getelementptr i16, i16* %pointer.phi13, i32 32
151  %3 = getelementptr i16, i16* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
152  %wide.masked.gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
153  %4 = add nsw <8 x i16> %wide.masked.gather, %broadcast.splat
154  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %4, <8 x i16*> %3, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
155  %index.next = add i32 %index, 4
156  %5 = icmp eq i32 %index.next, 996
157  br i1 %5, label %end, label %vector.body
158
159end:
160  ret void
161}
162
163
164define void @ptr_iv_v8i16_mult(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i16 %y) {
165; CHECK-LABEL: ptr_iv_v8i16_mult:
166; CHECK:       @ %bb.0: @ %vector.ph
167; CHECK-NEXT:    .save {r7, lr}
168; CHECK-NEXT:    push {r7, lr}
169; CHECK-NEXT:    mov.w lr, #249
170; CHECK-NEXT:    adr.w r12, .LCPI3_0
171; CHECK-NEXT:    adr r3, .LCPI3_1
172; CHECK-NEXT:    vldrw.u32 q0, [r3]
173; CHECK-NEXT:    vldrw.u32 q1, [r12]
174; CHECK-NEXT:  .LBB3_1: @ %vector.body
175; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
176; CHECK-NEXT:    vldrh.u16 q2, [r0, q0, uxtw #1]
177; CHECK-NEXT:    adds r0, #64
178; CHECK-NEXT:    vadd.i16 q2, q2, r2
179; CHECK-NEXT:    vstrh.16 q2, [r1, q1, uxtw #1]
180; CHECK-NEXT:    adds r1, #64
181; CHECK-NEXT:    le lr, .LBB3_1
182; CHECK-NEXT:  @ %bb.2: @ %end
183; CHECK-NEXT:    pop {r7, pc}
184; CHECK-NEXT:    .p2align 4
185; CHECK-NEXT:  @ %bb.3:
186; CHECK-NEXT:  .LCPI3_0:
187; CHECK-NEXT:    .short 5 @ 0x5
188; CHECK-NEXT:    .short 9 @ 0x9
189; CHECK-NEXT:    .short 13 @ 0xd
190; CHECK-NEXT:    .short 17 @ 0x11
191; CHECK-NEXT:    .short 21 @ 0x15
192; CHECK-NEXT:    .short 25 @ 0x19
193; CHECK-NEXT:    .short 29 @ 0x1d
194; CHECK-NEXT:    .short 33 @ 0x21
195; CHECK-NEXT:  .LCPI3_1:
196; CHECK-NEXT:    .short 3 @ 0x3
197; CHECK-NEXT:    .short 7 @ 0x7
198; CHECK-NEXT:    .short 11 @ 0xb
199; CHECK-NEXT:    .short 15 @ 0xf
200; CHECK-NEXT:    .short 19 @ 0x13
201; CHECK-NEXT:    .short 23 @ 0x17
202; CHECK-NEXT:    .short 27 @ 0x1b
203; CHECK-NEXT:    .short 31 @ 0x1f
204vector.ph:
205  %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %y, i32 0
206  %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
207  br label %vector.body
208
209vector.body:
210  %pointer.phi = phi i16* [ %A, %vector.ph ], [ %0, %vector.body ]
211  %pointer.phi13 = phi i16* [ %B, %vector.ph ], [ %2, %vector.body ]
212  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
213  %0 = getelementptr i16, i16* %pointer.phi, i32 32
214  %1 = getelementptr i16, i16* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
215  %gather.address = getelementptr i16, <8 x i16*> %1, i16 3
216  %2 = getelementptr i16, i16* %pointer.phi13, i32 32
217  %3 = getelementptr i16, i16* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
218  %scatter.address = getelementptr i16, <8 x i16*> %3, i16 5
219  %wide.masked.gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gather.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
220  %4 = add nsw <8 x i16> %wide.masked.gather, %broadcast.splat
221  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %4, <8 x i16*> %scatter.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
222  %index.next = add i32 %index, 4
223  %5 = icmp eq i32 %index.next, 996
224  br i1 %5, label %end, label %vector.body
225
226end:
227  ret void
228}
229
230define void @ptr_iv_v16i8(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i8 %y) {
231; CHECK-LABEL: ptr_iv_v16i8:
232; CHECK:       @ %bb.0: @ %vector.ph
233; CHECK-NEXT:    .save {r7, lr}
234; CHECK-NEXT:    push {r7, lr}
235; CHECK-NEXT:    mov.w lr, #249
236; CHECK-NEXT:    adr r3, .LCPI4_0
237; CHECK-NEXT:    vldrw.u32 q0, [r3]
238; CHECK-NEXT:  .LBB4_1: @ %vector.body
239; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
240; CHECK-NEXT:    vldrb.u8 q1, [r0, q0]
241; CHECK-NEXT:    adds r0, #64
242; CHECK-NEXT:    vadd.i8 q1, q1, r2
243; CHECK-NEXT:    vstrb.8 q1, [r1, q0]
244; CHECK-NEXT:    adds r1, #64
245; CHECK-NEXT:    le lr, .LBB4_1
246; CHECK-NEXT:  @ %bb.2: @ %end
247; CHECK-NEXT:    pop {r7, pc}
248; CHECK-NEXT:    .p2align 4
249; CHECK-NEXT:  @ %bb.3:
250; CHECK-NEXT:  .LCPI4_0:
251; CHECK-NEXT:    .byte 0 @ 0x0
252; CHECK-NEXT:    .byte 4 @ 0x4
253; CHECK-NEXT:    .byte 8 @ 0x8
254; CHECK-NEXT:    .byte 12 @ 0xc
255; CHECK-NEXT:    .byte 16 @ 0x10
256; CHECK-NEXT:    .byte 20 @ 0x14
257; CHECK-NEXT:    .byte 24 @ 0x18
258; CHECK-NEXT:    .byte 28 @ 0x1c
259; CHECK-NEXT:    .byte 32 @ 0x20
260; CHECK-NEXT:    .byte 36 @ 0x24
261; CHECK-NEXT:    .byte 40 @ 0x28
262; CHECK-NEXT:    .byte 44 @ 0x2c
263; CHECK-NEXT:    .byte 48 @ 0x30
264; CHECK-NEXT:    .byte 52 @ 0x34
265; CHECK-NEXT:    .byte 56 @ 0x38
266; CHECK-NEXT:    .byte 60 @ 0x3c
267vector.ph:                                        ; preds = %entry
268  %broadcast.splatinsert = insertelement <16 x i8> undef, i8 %y, i32 0
269  %broadcast.splat = shufflevector <16 x i8> %broadcast.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
270  br label %vector.body
271
272vector.body:
273  %pointer.phi = phi i8* [ %A, %vector.ph ], [ %0, %vector.body ]
274  %pointer.phi13 = phi i8* [ %B, %vector.ph ], [ %2, %vector.body ]
275  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
276  %0 = getelementptr i8, i8* %pointer.phi, i32 64
277  %1 = getelementptr i8, i8* %pointer.phi, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60>
278  %2 = getelementptr i8, i8* %pointer.phi13, i32 64
279  %3 = getelementptr i8, i8* %pointer.phi13, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60>
280  %wide.masked.gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %1, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
281  %4 = add nsw <16 x i8> %wide.masked.gather, %broadcast.splat
282  call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %4, <16 x i8*> %3, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
283  %index.next = add i32 %index, 4
284  %5 = icmp eq i32 %index.next, 996
285  br i1 %5, label %end, label %vector.body
286
287end:
288  ret void
289}
290
291
292define void @ptr_iv_v16i8_mult(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i8 %y) {
293; CHECK-LABEL: ptr_iv_v16i8_mult:
294; CHECK:       @ %bb.0: @ %vector.ph
295; CHECK-NEXT:    .save {r7, lr}
296; CHECK-NEXT:    push {r7, lr}
297; CHECK-NEXT:    mov.w lr, #249
298; CHECK-NEXT:    adr.w r12, .LCPI5_0
299; CHECK-NEXT:    adr r3, .LCPI5_1
300; CHECK-NEXT:    vldrw.u32 q0, [r3]
301; CHECK-NEXT:    vldrw.u32 q1, [r12]
302; CHECK-NEXT:  .LBB5_1: @ %vector.body
303; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
304; CHECK-NEXT:    vldrb.u8 q2, [r0, q0]
305; CHECK-NEXT:    adds r0, #64
306; CHECK-NEXT:    vadd.i8 q2, q2, r2
307; CHECK-NEXT:    vstrb.8 q2, [r1, q1]
308; CHECK-NEXT:    adds r1, #64
309; CHECK-NEXT:    le lr, .LBB5_1
310; CHECK-NEXT:  @ %bb.2: @ %end
311; CHECK-NEXT:    pop {r7, pc}
312; CHECK-NEXT:    .p2align 4
313; CHECK-NEXT:  @ %bb.3:
314; CHECK-NEXT:  .LCPI5_0:
315; CHECK-NEXT:    .byte 5 @ 0x5
316; CHECK-NEXT:    .byte 9 @ 0x9
317; CHECK-NEXT:    .byte 13 @ 0xd
318; CHECK-NEXT:    .byte 17 @ 0x11
319; CHECK-NEXT:    .byte 21 @ 0x15
320; CHECK-NEXT:    .byte 25 @ 0x19
321; CHECK-NEXT:    .byte 29 @ 0x1d
322; CHECK-NEXT:    .byte 33 @ 0x21
323; CHECK-NEXT:    .byte 37 @ 0x25
324; CHECK-NEXT:    .byte 41 @ 0x29
325; CHECK-NEXT:    .byte 45 @ 0x2d
326; CHECK-NEXT:    .byte 49 @ 0x31
327; CHECK-NEXT:    .byte 53 @ 0x35
328; CHECK-NEXT:    .byte 57 @ 0x39
329; CHECK-NEXT:    .byte 61 @ 0x3d
330; CHECK-NEXT:    .byte 65 @ 0x41
331; CHECK-NEXT:  .LCPI5_1:
332; CHECK-NEXT:    .byte 3 @ 0x3
333; CHECK-NEXT:    .byte 7 @ 0x7
334; CHECK-NEXT:    .byte 11 @ 0xb
335; CHECK-NEXT:    .byte 15 @ 0xf
336; CHECK-NEXT:    .byte 19 @ 0x13
337; CHECK-NEXT:    .byte 23 @ 0x17
338; CHECK-NEXT:    .byte 27 @ 0x1b
339; CHECK-NEXT:    .byte 31 @ 0x1f
340; CHECK-NEXT:    .byte 35 @ 0x23
341; CHECK-NEXT:    .byte 39 @ 0x27
342; CHECK-NEXT:    .byte 43 @ 0x2b
343; CHECK-NEXT:    .byte 47 @ 0x2f
344; CHECK-NEXT:    .byte 51 @ 0x33
345; CHECK-NEXT:    .byte 55 @ 0x37
346; CHECK-NEXT:    .byte 59 @ 0x3b
347; CHECK-NEXT:    .byte 63 @ 0x3f
348vector.ph:                                        ; preds = %entry
349  %broadcast.splatinsert = insertelement <16 x i8> undef, i8 %y, i32 0
350  %broadcast.splat = shufflevector <16 x i8> %broadcast.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
351  br label %vector.body
352
353vector.body:
354  %pointer.phi = phi i8* [ %A, %vector.ph ], [ %0, %vector.body ]
355  %pointer.phi13 = phi i8* [ %B, %vector.ph ], [ %2, %vector.body ]
356  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
357  %0 = getelementptr i8, i8* %pointer.phi, i32 64
358  %1 = getelementptr i8, i8* %pointer.phi, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60>
359  %gather.address = getelementptr i8, <16 x i8*> %1, i8 3
360  %2 = getelementptr i8, i8* %pointer.phi13, i32 64
361  %3 = getelementptr i8, i8* %pointer.phi13, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60>
362  %scatter.address = getelementptr i8, <16 x i8*> %3, i8 5
363  %wide.masked.gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gather.address, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
364  %4 = add nsw <16 x i8> %wide.masked.gather, %broadcast.splat
365  call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %4, <16 x i8*> %scatter.address, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
366  %index.next = add i32 %index, 4
367  %5 = icmp eq i32 %index.next, 996
368  br i1 %5, label %end, label %vector.body
369
370end:
371  ret void
372}
373
374define void @ptr_iv_v4f32(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
375; CHECK-LABEL: ptr_iv_v4f32:
376; CHECK:       @ %bb.0: @ %vector.ph
377; CHECK-NEXT:    .save {r7, lr}
378; CHECK-NEXT:    push {r7, lr}
379; CHECK-NEXT:    mov.w lr, #249
380; CHECK-NEXT:    adr r3, .LCPI6_0
381; CHECK-NEXT:    vldrw.u32 q0, [r3]
382; CHECK-NEXT:  .LBB6_1: @ %vector.body
383; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
384; CHECK-NEXT:    vldrw.u32 q1, [r0, q0, uxtw #2]
385; CHECK-NEXT:    adds r0, #64
386; CHECK-NEXT:    vadd.f32 q1, q1, r2
387; CHECK-NEXT:    vstrw.32 q1, [r1, q0, uxtw #2]
388; CHECK-NEXT:    adds r1, #64
389; CHECK-NEXT:    le lr, .LBB6_1
390; CHECK-NEXT:  @ %bb.2: @ %end
391; CHECK-NEXT:    pop {r7, pc}
392; CHECK-NEXT:    .p2align 4
393; CHECK-NEXT:  @ %bb.3:
394; CHECK-NEXT:  .LCPI6_0:
395; CHECK-NEXT:    .long 0 @ 0x0
396; CHECK-NEXT:    .long 4 @ 0x4
397; CHECK-NEXT:    .long 8 @ 0x8
398; CHECK-NEXT:    .long 12 @ 0xc
399vector.ph:                                        ; preds = %entry
400  %broadcast.splatinsert = insertelement <4 x float> undef, float %y, i32 0
401  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
402  br label %vector.body
403
404vector.body:
405  %pointer.phi = phi float* [ %A, %vector.ph ], [ %0, %vector.body ]
406  %pointer.phi13 = phi float* [ %B, %vector.ph ], [ %2, %vector.body ]
407  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
408  %0 = getelementptr float, float* %pointer.phi, i32 16
409  %1 = getelementptr float, float* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
410  %2 = getelementptr float, float* %pointer.phi13, i32 16
411  %3 = getelementptr float, float* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
412  %wide.masked.gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
413  %4 = fadd <4 x float> %wide.masked.gather, %broadcast.splat
414  call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %4, <4 x float*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
415  %index.next = add i32 %index, 4
416  %5 = icmp eq i32 %index.next, 996
417  br i1 %5, label %end, label %vector.body
418
419end:
420  ret void
421}
422
423define void @ptr_iv_v4f32_mult(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
424; CHECK-LABEL: ptr_iv_v4f32_mult:
425; CHECK:       @ %bb.0: @ %vector.ph
426; CHECK-NEXT:    .save {r7, lr}
427; CHECK-NEXT:    push {r7, lr}
428; CHECK-NEXT:    mov.w lr, #249
429; CHECK-NEXT:    adr r1, .LCPI7_0
430; CHECK-NEXT:    adr r3, .LCPI7_1
431; CHECK-NEXT:    vldrw.u32 q0, [r3]
432; CHECK-NEXT:    vldrw.u32 q1, [r1]
433; CHECK-NEXT:  .LBB7_1: @ %vector.body
434; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
435; CHECK-NEXT:    vldrw.u32 q2, [r0, q0, uxtw #2]
436; CHECK-NEXT:    vadd.f32 q2, q2, r2
437; CHECK-NEXT:    vstrw.32 q2, [r0, q1, uxtw #2]
438; CHECK-NEXT:    adds r0, #64
439; CHECK-NEXT:    le lr, .LBB7_1
440; CHECK-NEXT:  @ %bb.2: @ %end
441; CHECK-NEXT:    pop {r7, pc}
442; CHECK-NEXT:    .p2align 4
443; CHECK-NEXT:  @ %bb.3:
444; CHECK-NEXT:  .LCPI7_0:
445; CHECK-NEXT:    .long 5 @ 0x5
446; CHECK-NEXT:    .long 9 @ 0x9
447; CHECK-NEXT:    .long 13 @ 0xd
448; CHECK-NEXT:    .long 17 @ 0x11
449; CHECK-NEXT:  .LCPI7_1:
450; CHECK-NEXT:    .long 3 @ 0x3
451; CHECK-NEXT:    .long 7 @ 0x7
452; CHECK-NEXT:    .long 11 @ 0xb
453; CHECK-NEXT:    .long 15 @ 0xf
454vector.ph:                                        ; preds = %entry
455  %broadcast.splatinsert = insertelement <4 x float> undef, float %y, i32 0
456  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
457  br label %vector.body
458
459vector.body:
460  %pointer.phi = phi float* [ %A, %vector.ph ], [ %0, %vector.body ]
461  %pointer.phi13 = phi float* [ %B, %vector.ph ], [ %2, %vector.body ]
462  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
463  %0 = getelementptr float, float* %pointer.phi, i32 16
464  %1 = getelementptr float, float* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
465  %gather.address = getelementptr float, <4 x float*> %1, i32 3
466  %2 = getelementptr float, float* %pointer.phi13, i32 16
467  %3 = getelementptr float, float* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
468  %scatter.address = getelementptr float, <4 x float*> %1, i32 5
469  %wide.masked.gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gather.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
470  %4 = fadd <4 x float> %wide.masked.gather, %broadcast.splat
471  call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %4, <4 x float*> %scatter.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
472  %index.next = add i32 %index, 4
473  %5 = icmp eq i32 %index.next, 996
474  br i1 %5, label %end, label %vector.body
475
476end:
477  ret void
478}
479
480define void @ptr_iv_v8f16(half* noalias nocapture readonly %A, half* noalias nocapture %B, float %y) {
481; CHECK-LABEL: ptr_iv_v8f16:
482; CHECK:       @ %bb.0: @ %vector.ph
483; CHECK-NEXT:    .save {r7, lr}
484; CHECK-NEXT:    push {r7, lr}
485; CHECK-NEXT:    vmov s0, r2
486; CHECK-NEXT:    mov.w lr, #249
487; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
488; CHECK-NEXT:    adr r3, .LCPI8_0
489; CHECK-NEXT:    vmov.f16 r2, s0
490; CHECK-NEXT:    vldrw.u32 q0, [r3]
491; CHECK-NEXT:  .LBB8_1: @ %vector.body
492; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
493; CHECK-NEXT:    vldrh.u16 q1, [r0, q0, uxtw #1]
494; CHECK-NEXT:    adds r0, #64
495; CHECK-NEXT:    vadd.f16 q1, q1, r2
496; CHECK-NEXT:    vstrh.16 q1, [r1, q0, uxtw #1]
497; CHECK-NEXT:    adds r1, #64
498; CHECK-NEXT:    le lr, .LBB8_1
499; CHECK-NEXT:  @ %bb.2: @ %end
500; CHECK-NEXT:    pop {r7, pc}
501; CHECK-NEXT:    .p2align 4
502; CHECK-NEXT:  @ %bb.3:
503; CHECK-NEXT:  .LCPI8_0:
504; CHECK-NEXT:    .short 0 @ 0x0
505; CHECK-NEXT:    .short 4 @ 0x4
506; CHECK-NEXT:    .short 8 @ 0x8
507; CHECK-NEXT:    .short 12 @ 0xc
508; CHECK-NEXT:    .short 16 @ 0x10
509; CHECK-NEXT:    .short 20 @ 0x14
510; CHECK-NEXT:    .short 24 @ 0x18
511; CHECK-NEXT:    .short 28 @ 0x1c
512vector.ph:
513  %y.trunc = fptrunc float %y to half
514  %broadcast.splatinsert = insertelement <8 x half> undef, half %y.trunc, i32 0
515  %broadcast.splat = shufflevector <8 x half> %broadcast.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
516  br label %vector.body
517
518vector.body:
519  %pointer.phi = phi half* [ %A, %vector.ph ], [ %0, %vector.body ]
520  %pointer.phi13 = phi half* [ %B, %vector.ph ], [ %2, %vector.body ]
521  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
522  %0 = getelementptr half, half* %pointer.phi, i32 32
523  %1 = getelementptr half, half* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
524  %2 = getelementptr half, half* %pointer.phi13, i32 32
525  %3 = getelementptr half, half* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
526  %wide.masked.gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
527  %4 = fadd <8 x half> %wide.masked.gather, %broadcast.splat
528  call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %4, <8 x half*> %3, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
529  %index.next = add i32 %index, 4
530  %5 = icmp eq i32 %index.next, 996
531  br i1 %5, label %end, label %vector.body
532
533end:
534  ret void
535}
536
537define void @ptr_iv_v8f16_mult(half* noalias nocapture readonly %A, half* noalias nocapture %B, float %y) {
538; CHECK-LABEL: ptr_iv_v8f16_mult:
539; CHECK:       @ %bb.0: @ %vector.ph
540; CHECK-NEXT:    .save {r7, lr}
541; CHECK-NEXT:    push {r7, lr}
542; CHECK-NEXT:    vmov s0, r2
543; CHECK-NEXT:    adr r2, .LCPI9_0
544; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
545; CHECK-NEXT:    mov.w lr, #249
546; CHECK-NEXT:    vmov.f16 r1, s0
547; CHECK-NEXT:    vldrw.u32 q0, [r2]
548; CHECK-NEXT:    adr r2, .LCPI9_1
549; CHECK-NEXT:    vldrw.u32 q1, [r2]
550; CHECK-NEXT:  .LBB9_1: @ %vector.body
551; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
552; CHECK-NEXT:    vldrh.u16 q2, [r0, q0, uxtw #1]
553; CHECK-NEXT:    vadd.f16 q2, q2, r1
554; CHECK-NEXT:    vstrh.16 q2, [r0, q1, uxtw #1]
555; CHECK-NEXT:    adds r0, #64
556; CHECK-NEXT:    le lr, .LBB9_1
557; CHECK-NEXT:  @ %bb.2: @ %end
558; CHECK-NEXT:    pop {r7, pc}
559; CHECK-NEXT:    .p2align 4
560; CHECK-NEXT:  @ %bb.3:
561; CHECK-NEXT:  .LCPI9_0:
562; CHECK-NEXT:    .short 3 @ 0x3
563; CHECK-NEXT:    .short 7 @ 0x7
564; CHECK-NEXT:    .short 11 @ 0xb
565; CHECK-NEXT:    .short 15 @ 0xf
566; CHECK-NEXT:    .short 19 @ 0x13
567; CHECK-NEXT:    .short 23 @ 0x17
568; CHECK-NEXT:    .short 27 @ 0x1b
569; CHECK-NEXT:    .short 31 @ 0x1f
570; CHECK-NEXT:  .LCPI9_1:
571; CHECK-NEXT:    .short 5 @ 0x5
572; CHECK-NEXT:    .short 9 @ 0x9
573; CHECK-NEXT:    .short 13 @ 0xd
574; CHECK-NEXT:    .short 17 @ 0x11
575; CHECK-NEXT:    .short 21 @ 0x15
576; CHECK-NEXT:    .short 25 @ 0x19
577; CHECK-NEXT:    .short 29 @ 0x1d
578; CHECK-NEXT:    .short 33 @ 0x21
579vector.ph:
580  %y.trunc = fptrunc float %y to half
581  %broadcast.splatinsert = insertelement <8 x half> undef, half %y.trunc, i32 0
582  %broadcast.splat = shufflevector <8 x half> %broadcast.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
583  br label %vector.body
584
585vector.body:
586  %pointer.phi = phi half* [ %A, %vector.ph ], [ %0, %vector.body ]
587  %pointer.phi13 = phi half* [ %B, %vector.ph ], [ %2, %vector.body ]
588  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
589  %0 = getelementptr half, half* %pointer.phi, i32 32
590  %1 = getelementptr half, half* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
591  %gather.address = getelementptr half, <8 x half*> %1, i32 3
592  %2 = getelementptr half, half* %pointer.phi13, i32 32
593  %3 = getelementptr half, half* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28>
594  %scatter.address = getelementptr half, <8 x half*> %1, i32 5
595  %wide.masked.gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gather.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
596  %4 = fadd <8 x half> %wide.masked.gather, %broadcast.splat
597  call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %4, <8 x half*> %scatter.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
598  %index.next = add i32 %index, 4
599  %5 = icmp eq i32 %index.next, 996
600  br i1 %5, label %end, label %vector.body
601
602end:
603  ret void
604}
605
606
607define arm_aapcs_vfpcc void @three_pointer_iv_v4i32(i32* nocapture readonly %x, i32* nocapture %z, i32 %n) {
608; CHECK-LABEL: three_pointer_iv_v4i32:
609; CHECK:       @ %bb.0: @ %vector.ph
610; CHECK-NEXT:    .save {r7, lr}
611; CHECK-NEXT:    push {r7, lr}
612; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
613; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
614; CHECK-NEXT:    adr.w r12, .LCPI10_0
615; CHECK-NEXT:    adr.w lr, .LCPI10_1
616; CHECK-NEXT:    adr r3, .LCPI10_2
617; CHECK-NEXT:    vldrw.u32 q2, [lr]
618; CHECK-NEXT:    vldrw.u32 q1, [r3]
619; CHECK-NEXT:    vldrw.u32 q3, [r12]
620; CHECK-NEXT:    vmov.i32 q0, #0xa
621; CHECK-NEXT:  .LBB10_1: @ %vector.body
622; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
623; CHECK-NEXT:    vldrw.u32 q4, [r0, q1, uxtw #2]
624; CHECK-NEXT:    vldrw.u32 q5, [r0, q2, uxtw #2]
625; CHECK-NEXT:    vldrw.u32 q6, [r0, q3, uxtw #2]
626; CHECK-NEXT:    subs r2, #4
627; CHECK-NEXT:    vmul.i32 q4, q5, q4
628; CHECK-NEXT:    add.w r0, r0, #48
629; CHECK-NEXT:    vmul.i32 q6, q5, q6
630; CHECK-NEXT:    vmul.i32 q5, q5, q0
631; CHECK-NEXT:    vstrw.32 q5, [r1, q2, uxtw #2]
632; CHECK-NEXT:    vstrw.32 q6, [r1, q3, uxtw #2]
633; CHECK-NEXT:    vstrw.32 q4, [r1, q1, uxtw #2]
634; CHECK-NEXT:    add.w r1, r1, #48
635; CHECK-NEXT:    bne .LBB10_1
636; CHECK-NEXT:  @ %bb.2: @ %end
637; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
638; CHECK-NEXT:    pop {r7, pc}
639; CHECK-NEXT:    .p2align 4
640; CHECK-NEXT:  @ %bb.3:
641; CHECK-NEXT:  .LCPI10_0:
642; CHECK-NEXT:    .long 1 @ 0x1
643; CHECK-NEXT:    .long 4 @ 0x4
644; CHECK-NEXT:    .long 7 @ 0x7
645; CHECK-NEXT:    .long 10 @ 0xa
646; CHECK-NEXT:  .LCPI10_1:
647; CHECK-NEXT:    .long 0 @ 0x0
648; CHECK-NEXT:    .long 3 @ 0x3
649; CHECK-NEXT:    .long 6 @ 0x6
650; CHECK-NEXT:    .long 9 @ 0x9
651; CHECK-NEXT:  .LCPI10_2:
652; CHECK-NEXT:    .long 2 @ 0x2
653; CHECK-NEXT:    .long 5 @ 0x5
654; CHECK-NEXT:    .long 8 @ 0x8
655; CHECK-NEXT:    .long 11 @ 0xb
656vector.ph:
657  br label %vector.body
658
659vector.body:
660  %pointer.phi = phi i32* [ %x, %vector.ph ], [ %v3, %vector.body ]
661  %pointer.phi55 = phi i32* [ %z, %vector.ph ], [ %v4, %vector.body ]
662  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
663  %vector.gep = getelementptr i32, i32* %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
664  %v3 = getelementptr i32, i32* %pointer.phi, i32 12
665  %vector.gep56 = getelementptr i32, i32* %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
666  %v4 = getelementptr i32, i32* %pointer.phi55, i32 12
667  %v5 = add i32 %index, 0
668  %v6 = getelementptr inbounds i32, <4 x i32*> %vector.gep, i32 1
669  %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %vector.gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
670  %v7 = getelementptr inbounds i32, <4 x i32*> %vector.gep, i32 2
671  %wide.masked.gather57 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %v6, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
672  %wide.masked.gather58 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %v7, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
673  %v11 = mul nuw nsw <4 x i32> %wide.masked.gather, <i32 10, i32 10, i32 10, i32 10>
674  %v13 = mul nuw nsw <4 x i32> %wide.masked.gather, %wide.masked.gather57
675  %v15 = mul nuw nsw <4 x i32> %wide.masked.gather, %wide.masked.gather58
676  %v17 = getelementptr inbounds i32, <4 x i32*> %vector.gep56, i32 1
677  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %v11, <4 x i32*> %vector.gep56, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
678  %v18 = getelementptr inbounds i32, <4 x i32*> %vector.gep56, i32 2
679  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %v13, <4 x i32*> %v17, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
680  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %v15, <4 x i32*> %v18, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
681  %index.next = add i32 %index, 4
682  %v37 = icmp eq i32 %index.next, %n
683  br i1 %v37, label %end, label %vector.body
684
685end:
686  ret void;
687}
688
689define arm_aapcs_vfpcc void @three_pointer_iv_v4i8(i8* nocapture readonly %x, i8* nocapture %z, i32 %n) {
690; CHECK-LABEL: three_pointer_iv_v4i8:
691; CHECK:       @ %bb.0: @ %vector.ph
692; CHECK-NEXT:    .save {r7, lr}
693; CHECK-NEXT:    push {r7, lr}
694; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
695; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
696; CHECK-NEXT:    adr.w r12, .LCPI11_0
697; CHECK-NEXT:    adr.w lr, .LCPI11_1
698; CHECK-NEXT:    adr r3, .LCPI11_2
699; CHECK-NEXT:    vldrw.u32 q2, [lr]
700; CHECK-NEXT:    vldrw.u32 q1, [r3]
701; CHECK-NEXT:    vldrw.u32 q3, [r12]
702; CHECK-NEXT:    vmov.i32 q0, #0xa
703; CHECK-NEXT:  .LBB11_1: @ %vector.body
704; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
705; CHECK-NEXT:    vldrb.u32 q4, [r0, q1]
706; CHECK-NEXT:    vldrb.u32 q5, [r0, q2]
707; CHECK-NEXT:    vldrb.u32 q6, [r0, q3]
708; CHECK-NEXT:    subs r2, #4
709; CHECK-NEXT:    vmul.i32 q4, q5, q4
710; CHECK-NEXT:    add.w r0, r0, #12
711; CHECK-NEXT:    vmul.i32 q6, q5, q6
712; CHECK-NEXT:    vmul.i32 q5, q5, q0
713; CHECK-NEXT:    vstrb.32 q5, [r1, q2]
714; CHECK-NEXT:    vstrb.32 q6, [r1, q3]
715; CHECK-NEXT:    vstrb.32 q4, [r1, q1]
716; CHECK-NEXT:    add.w r1, r1, #12
717; CHECK-NEXT:    bne .LBB11_1
718; CHECK-NEXT:  @ %bb.2: @ %end
719; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
720; CHECK-NEXT:    pop {r7, pc}
721; CHECK-NEXT:    .p2align 4
722; CHECK-NEXT:  @ %bb.3:
723; CHECK-NEXT:  .LCPI11_0:
724; CHECK-NEXT:    .long 1 @ 0x1
725; CHECK-NEXT:    .long 4 @ 0x4
726; CHECK-NEXT:    .long 7 @ 0x7
727; CHECK-NEXT:    .long 10 @ 0xa
728; CHECK-NEXT:  .LCPI11_1:
729; CHECK-NEXT:    .long 0 @ 0x0
730; CHECK-NEXT:    .long 3 @ 0x3
731; CHECK-NEXT:    .long 6 @ 0x6
732; CHECK-NEXT:    .long 9 @ 0x9
733; CHECK-NEXT:  .LCPI11_2:
734; CHECK-NEXT:    .long 2 @ 0x2
735; CHECK-NEXT:    .long 5 @ 0x5
736; CHECK-NEXT:    .long 8 @ 0x8
737; CHECK-NEXT:    .long 11 @ 0xb
738vector.ph:
739  br label %vector.body
740
741vector.body:                                      ; preds = %vector.body, %vector.ph
742  %pointer.phi = phi i8* [ %x, %vector.ph ], [ %v3, %vector.body ]
743  %pointer.phi55 = phi i8* [ %z, %vector.ph ], [ %v4, %vector.body ]
744  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
745  %vector.gep = getelementptr i8, i8* %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
746  %v3 = getelementptr i8, i8* %pointer.phi, i32 12
747  %vector.gep56 = getelementptr i8, i8* %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
748  %v4 = getelementptr i8, i8* %pointer.phi55, i32 12
749  %v5 = add i32 %index, 0
750  %v6 = getelementptr inbounds i8, <4 x i8*> %vector.gep, i32 1
751  %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %vector.gep, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
752  %v7 = getelementptr inbounds i8, <4 x i8*> %vector.gep, i32 2
753  %wide.masked.gather57 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %v6, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
754  %wide.masked.gather58 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %v7, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
755  %v8 = zext <4 x i8> %wide.masked.gather to <4 x i32>
756  %v9 = zext <4 x i8> %wide.masked.gather57 to <4 x i32>
757  %v10 = zext <4 x i8> %wide.masked.gather58 to <4 x i32>
758  %v11 = mul nuw nsw <4 x i32> %v8, <i32 10, i32 10, i32 10, i32 10>
759  %v12 = trunc <4 x i32> %v11 to <4 x i8>
760  %v13 = mul nuw nsw <4 x i32> %v8, %v9
761  %v14 = trunc <4 x i32> %v13 to <4 x i8>
762  %v15 = mul nuw nsw <4 x i32> %v8, %v10
763  %v16 = trunc <4 x i32> %v15 to <4 x i8>
764  %v17 = getelementptr inbounds i8, <4 x i8*> %vector.gep56, i32 1
765  call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %v12, <4 x i8*> %vector.gep56, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
766  %v18 = getelementptr inbounds i8, <4 x i8*> %vector.gep56, i32 2
767  call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %v14, <4 x i8*> %v17, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
768  call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %v16, <4 x i8*> %v18, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
769  %index.next = add i32 %index, 4
770  %v37 = icmp eq i32 %index.next, %n
771  br i1 %v37, label %end, label %vector.body
772
773end:
774  ret void;
775}
776
777define arm_aapcs_vfpcc void @three_pointer_iv_v8i16(i16* nocapture readonly %x, i16* nocapture %z, i32 %n) {
778; CHECK-LABEL: three_pointer_iv_v8i16:
779; CHECK:       @ %bb.0: @ %vector.ph
780; CHECK-NEXT:    .save {r7, lr}
781; CHECK-NEXT:    push {r7, lr}
782; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
783; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
784; CHECK-NEXT:    adr.w r12, .LCPI12_0
785; CHECK-NEXT:    adr.w lr, .LCPI12_1
786; CHECK-NEXT:    adr r3, .LCPI12_2
787; CHECK-NEXT:    vldrw.u32 q2, [lr]
788; CHECK-NEXT:    vldrw.u32 q1, [r3]
789; CHECK-NEXT:    vldrw.u32 q3, [r12]
790; CHECK-NEXT:    vmov.i16 q0, #0xa
791; CHECK-NEXT:  .LBB12_1: @ %vector.body
792; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
793; CHECK-NEXT:    vldrh.u16 q4, [r0, q1, uxtw #1]
794; CHECK-NEXT:    vldrh.u16 q5, [r0, q2, uxtw #1]
795; CHECK-NEXT:    vldrh.u16 q6, [r0, q3, uxtw #1]
796; CHECK-NEXT:    subs r2, #4
797; CHECK-NEXT:    vmul.i16 q4, q5, q4
798; CHECK-NEXT:    add.w r0, r0, #48
799; CHECK-NEXT:    vmul.i16 q6, q5, q6
800; CHECK-NEXT:    vmul.i16 q5, q5, q0
801; CHECK-NEXT:    vstrh.16 q5, [r1, q2, uxtw #1]
802; CHECK-NEXT:    vstrh.16 q6, [r1, q3, uxtw #1]
803; CHECK-NEXT:    vstrh.16 q4, [r1, q1, uxtw #1]
804; CHECK-NEXT:    add.w r1, r1, #48
805; CHECK-NEXT:    bne .LBB12_1
806; CHECK-NEXT:  @ %bb.2: @ %end
807; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
808; CHECK-NEXT:    pop {r7, pc}
809; CHECK-NEXT:    .p2align 4
810; CHECK-NEXT:  @ %bb.3:
811; CHECK-NEXT:  .LCPI12_0:
812; CHECK-NEXT:    .short 1 @ 0x1
813; CHECK-NEXT:    .short 4 @ 0x4
814; CHECK-NEXT:    .short 7 @ 0x7
815; CHECK-NEXT:    .short 10 @ 0xa
816; CHECK-NEXT:    .short 13 @ 0xd
817; CHECK-NEXT:    .short 16 @ 0x10
818; CHECK-NEXT:    .short 19 @ 0x13
819; CHECK-NEXT:    .short 22 @ 0x16
820; CHECK-NEXT:  .LCPI12_1:
821; CHECK-NEXT:    .short 0 @ 0x0
822; CHECK-NEXT:    .short 3 @ 0x3
823; CHECK-NEXT:    .short 6 @ 0x6
824; CHECK-NEXT:    .short 9 @ 0x9
825; CHECK-NEXT:    .short 12 @ 0xc
826; CHECK-NEXT:    .short 15 @ 0xf
827; CHECK-NEXT:    .short 18 @ 0x12
828; CHECK-NEXT:    .short 21 @ 0x15
829; CHECK-NEXT:  .LCPI12_2:
830; CHECK-NEXT:    .short 2 @ 0x2
831; CHECK-NEXT:    .short 5 @ 0x5
832; CHECK-NEXT:    .short 8 @ 0x8
833; CHECK-NEXT:    .short 11 @ 0xb
834; CHECK-NEXT:    .short 14 @ 0xe
835; CHECK-NEXT:    .short 17 @ 0x11
836; CHECK-NEXT:    .short 20 @ 0x14
837; CHECK-NEXT:    .short 23 @ 0x17
838vector.ph:
839  br label %vector.body
840
841vector.body:
842  %pointer.phi = phi i16* [ %x, %vector.ph ], [ %v3, %vector.body ]
843  %pointer.phi55 = phi i16* [ %z, %vector.ph ], [ %v4, %vector.body ]
844  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
845  %vector.gep = getelementptr i16, i16* %pointer.phi, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
846  %v3 = getelementptr i16, i16* %pointer.phi, i32 24
847  %vector.gep56 = getelementptr i16, i16* %pointer.phi55, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21>
848  %v4 = getelementptr i16, i16* %pointer.phi55, i32 24
849  %v5 = add i32 %index, 0
850  %v6 = getelementptr inbounds i16, <8 x i16*> %vector.gep, i16 1
851  %wide.masked.gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %vector.gep, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
852  %v7 = getelementptr inbounds i16, <8 x i16*> %vector.gep, i16 2
853  %wide.masked.gather57 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %v6, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
854  %wide.masked.gather58 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %v7, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
855  %v11 = mul nuw nsw <8 x i16> %wide.masked.gather, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
856  %v13 = mul nuw nsw <8 x i16> %wide.masked.gather, %wide.masked.gather57
857  %v15 = mul nuw nsw <8 x i16> %wide.masked.gather, %wide.masked.gather58
858  %v17 = getelementptr inbounds i16, <8 x i16*> %vector.gep56, i32 1
859  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %v11, <8 x i16*> %vector.gep56, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
860  %v18 = getelementptr inbounds i16, <8 x i16*> %vector.gep56, i32 2
861  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %v13, <8 x i16*> %v17, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
862  call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %v15, <8 x i16*> %v18, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
863  %index.next = add i32 %index, 4
864  %v37 = icmp eq i32 %index.next, %n
865  br i1 %v37, label %end, label %vector.body
866
867end:
868  ret void;
869}
870
871define arm_aapcs_vfpcc void @three_pointer_iv_v16i8(i8* nocapture readonly %x, i8* nocapture %z, i32 %n) {
872; CHECK-LABEL: three_pointer_iv_v16i8:
873; CHECK:       @ %bb.0: @ %vector.ph
874; CHECK-NEXT:    .save {r7, lr}
875; CHECK-NEXT:    push {r7, lr}
876; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
877; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
878; CHECK-NEXT:    adr.w r12, .LCPI13_0
879; CHECK-NEXT:    adr.w lr, .LCPI13_1
880; CHECK-NEXT:    adr r3, .LCPI13_2
881; CHECK-NEXT:    vldrw.u32 q2, [lr]
882; CHECK-NEXT:    vldrw.u32 q1, [r3]
883; CHECK-NEXT:    vldrw.u32 q3, [r12]
884; CHECK-NEXT:    vmov.i8 q0, #0xa
885; CHECK-NEXT:  .LBB13_1: @ %vector.body
886; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
887; CHECK-NEXT:    vldrb.u8 q4, [r0, q1]
888; CHECK-NEXT:    vldrb.u8 q5, [r0, q2]
889; CHECK-NEXT:    vldrb.u8 q6, [r0, q3]
890; CHECK-NEXT:    subs r2, #4
891; CHECK-NEXT:    vmul.i8 q4, q5, q4
892; CHECK-NEXT:    add.w r0, r0, #48
893; CHECK-NEXT:    vmul.i8 q6, q5, q6
894; CHECK-NEXT:    vmul.i8 q5, q5, q0
895; CHECK-NEXT:    vstrb.8 q5, [r1, q2]
896; CHECK-NEXT:    vstrb.8 q6, [r1, q3]
897; CHECK-NEXT:    vstrb.8 q4, [r1, q1]
898; CHECK-NEXT:    add.w r1, r1, #48
899; CHECK-NEXT:    bne .LBB13_1
900; CHECK-NEXT:  @ %bb.2: @ %end
901; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
902; CHECK-NEXT:    pop {r7, pc}
903; CHECK-NEXT:    .p2align 4
904; CHECK-NEXT:  @ %bb.3:
905; CHECK-NEXT:  .LCPI13_0:
906; CHECK-NEXT:    .byte 1 @ 0x1
907; CHECK-NEXT:    .byte 4 @ 0x4
908; CHECK-NEXT:    .byte 7 @ 0x7
909; CHECK-NEXT:    .byte 10 @ 0xa
910; CHECK-NEXT:    .byte 13 @ 0xd
911; CHECK-NEXT:    .byte 16 @ 0x10
912; CHECK-NEXT:    .byte 19 @ 0x13
913; CHECK-NEXT:    .byte 22 @ 0x16
914; CHECK-NEXT:    .byte 25 @ 0x19
915; CHECK-NEXT:    .byte 28 @ 0x1c
916; CHECK-NEXT:    .byte 31 @ 0x1f
917; CHECK-NEXT:    .byte 34 @ 0x22
918; CHECK-NEXT:    .byte 37 @ 0x25
919; CHECK-NEXT:    .byte 40 @ 0x28
920; CHECK-NEXT:    .byte 43 @ 0x2b
921; CHECK-NEXT:    .byte 46 @ 0x2e
922; CHECK-NEXT:  .LCPI13_1:
923; CHECK-NEXT:    .byte 0 @ 0x0
924; CHECK-NEXT:    .byte 3 @ 0x3
925; CHECK-NEXT:    .byte 6 @ 0x6
926; CHECK-NEXT:    .byte 9 @ 0x9
927; CHECK-NEXT:    .byte 12 @ 0xc
928; CHECK-NEXT:    .byte 15 @ 0xf
929; CHECK-NEXT:    .byte 18 @ 0x12
930; CHECK-NEXT:    .byte 21 @ 0x15
931; CHECK-NEXT:    .byte 24 @ 0x18
932; CHECK-NEXT:    .byte 27 @ 0x1b
933; CHECK-NEXT:    .byte 30 @ 0x1e
934; CHECK-NEXT:    .byte 33 @ 0x21
935; CHECK-NEXT:    .byte 36 @ 0x24
936; CHECK-NEXT:    .byte 39 @ 0x27
937; CHECK-NEXT:    .byte 42 @ 0x2a
938; CHECK-NEXT:    .byte 45 @ 0x2d
939; CHECK-NEXT:  .LCPI13_2:
940; CHECK-NEXT:    .byte 2 @ 0x2
941; CHECK-NEXT:    .byte 5 @ 0x5
942; CHECK-NEXT:    .byte 8 @ 0x8
943; CHECK-NEXT:    .byte 11 @ 0xb
944; CHECK-NEXT:    .byte 14 @ 0xe
945; CHECK-NEXT:    .byte 17 @ 0x11
946; CHECK-NEXT:    .byte 20 @ 0x14
947; CHECK-NEXT:    .byte 23 @ 0x17
948; CHECK-NEXT:    .byte 26 @ 0x1a
949; CHECK-NEXT:    .byte 29 @ 0x1d
950; CHECK-NEXT:    .byte 32 @ 0x20
951; CHECK-NEXT:    .byte 35 @ 0x23
952; CHECK-NEXT:    .byte 38 @ 0x26
953; CHECK-NEXT:    .byte 41 @ 0x29
954; CHECK-NEXT:    .byte 44 @ 0x2c
955; CHECK-NEXT:    .byte 47 @ 0x2f
956vector.ph:
957  br label %vector.body
958
959vector.body:
960  %pointer.phi = phi i8* [ %x, %vector.ph ], [ %v3, %vector.body ]
961  %pointer.phi55 = phi i8* [ %z, %vector.ph ], [ %v4, %vector.body ]
962  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
963  %vector.gep = getelementptr i8, i8* %pointer.phi, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>
964  %v3 = getelementptr i8, i8* %pointer.phi, i32 48
965  %vector.gep56 = getelementptr i8, i8* %pointer.phi55, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>
966  %v4 = getelementptr i8, i8* %pointer.phi55, i32 48
967  %v5 = add i32 %index, 0
968  %v6 = getelementptr inbounds i8, <16 x i8*> %vector.gep, i8 1
969  %wide.masked.gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %vector.gep, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
970  %v7 = getelementptr inbounds i8, <16 x i8*> %vector.gep, i8 2
971  %wide.masked.gather57 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %v6, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
972  %wide.masked.gather58 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %v7, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
973  %v11 = mul nuw nsw <16 x i8> %wide.masked.gather, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
974  %v13 = mul nuw nsw <16 x i8> %wide.masked.gather, %wide.masked.gather57
975  %v15 = mul nuw nsw <16 x i8> %wide.masked.gather, %wide.masked.gather58
976  %v17 = getelementptr inbounds i8, <16 x i8*> %vector.gep56, i32 1
977  call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %v11, <16 x i8*> %vector.gep56, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
978  %v18 = getelementptr inbounds i8, <16 x i8*> %vector.gep56, i32 2
979  call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %v13, <16 x i8*> %v17, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
980  call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %v15, <16 x i8*> %v18, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
981  %index.next = add i32 %index, 4
982  %v37 = icmp eq i32 %index.next, %n
983  br i1 %v37, label %end, label %vector.body
984
985end:
986  ret void;
987}
988
989declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
990declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
991declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
992declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>)
993declare <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*>, i32, <8 x i1>, <8 x half>)
994declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
995
996declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>)
997declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
998declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>)
999declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>)
1000declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>)
1001declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>)
1002