1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s
3; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -vec-extabi < %s | FileCheck %s
4
5define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
6; CHECK-LABEL: test_l_v16i8:
7; CHECK:       # %bb.0: # %entry
8; CHECK-NEXT:    li 4, 15
9; CHECK-NEXT:    lvsl 3, 0, 3
10; CHECK-NEXT:    lvx 2, 3, 4
11; CHECK-NEXT:    lvx 4, 0, 3
12; CHECK-NEXT:    vperm 2, 4, 2, 3
13; CHECK-NEXT:    blr
14entry:
15  %r = load <16 x i8>, <16 x i8>* %p, align 1
16  ret <16 x i8> %r
17
18}
19
20define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
21; CHECK-LABEL: test_l_v32i8:
22; CHECK:       # %bb.0: # %entry
23; CHECK-NEXT:    li 4, 31
24; CHECK-NEXT:    lvsl 5, 0, 3
25; CHECK-NEXT:    lvx 2, 3, 4
26; CHECK-NEXT:    li 4, 16
27; CHECK-NEXT:    lvx 4, 3, 4
28; CHECK-NEXT:    lvx 0, 0, 3
29; CHECK-NEXT:    vperm 3, 4, 2, 5
30; CHECK-NEXT:    vperm 2, 0, 4, 5
31; CHECK-NEXT:    blr
32entry:
33  %r = load <32 x i8>, <32 x i8>* %p, align 1
34  ret <32 x i8> %r
35
36}
37
38define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
39; CHECK-LABEL: test_l_v8i16:
40; CHECK:       # %bb.0: # %entry
41; CHECK-NEXT:    li 4, 15
42; CHECK-NEXT:    lvsl 3, 0, 3
43; CHECK-NEXT:    lvx 2, 3, 4
44; CHECK-NEXT:    lvx 4, 0, 3
45; CHECK-NEXT:    vperm 2, 4, 2, 3
46; CHECK-NEXT:    blr
47entry:
48  %r = load <8 x i16>, <8 x i16>* %p, align 2
49  ret <8 x i16> %r
50
51}
52
53define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
54; CHECK-LABEL: test_l_v16i16:
55; CHECK:       # %bb.0: # %entry
56; CHECK-NEXT:    li 4, 31
57; CHECK-NEXT:    lvsl 5, 0, 3
58; CHECK-NEXT:    lvx 2, 3, 4
59; CHECK-NEXT:    li 4, 16
60; CHECK-NEXT:    lvx 4, 3, 4
61; CHECK-NEXT:    lvx 0, 0, 3
62; CHECK-NEXT:    vperm 3, 4, 2, 5
63; CHECK-NEXT:    vperm 2, 0, 4, 5
64; CHECK-NEXT:    blr
65entry:
66  %r = load <16 x i16>, <16 x i16>* %p, align 2
67  ret <16 x i16> %r
68
69}
70
71define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
72; CHECK-LABEL: test_l_v4i32:
73; CHECK:       # %bb.0: # %entry
74; CHECK-NEXT:    li 4, 15
75; CHECK-NEXT:    lvsl 3, 0, 3
76; CHECK-NEXT:    lvx 2, 3, 4
77; CHECK-NEXT:    lvx 4, 0, 3
78; CHECK-NEXT:    vperm 2, 4, 2, 3
79; CHECK-NEXT:    blr
80entry:
81  %r = load <4 x i32>, <4 x i32>* %p, align 4
82  ret <4 x i32> %r
83
84}
85
86define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
87; CHECK-LABEL: test_l_v8i32:
88; CHECK:       # %bb.0: # %entry
89; CHECK-NEXT:    li 4, 31
90; CHECK-NEXT:    lvsl 5, 0, 3
91; CHECK-NEXT:    lvx 2, 3, 4
92; CHECK-NEXT:    li 4, 16
93; CHECK-NEXT:    lvx 4, 3, 4
94; CHECK-NEXT:    lvx 0, 0, 3
95; CHECK-NEXT:    vperm 3, 4, 2, 5
96; CHECK-NEXT:    vperm 2, 0, 4, 5
97; CHECK-NEXT:    blr
98entry:
99  %r = load <8 x i32>, <8 x i32>* %p, align 4
100  ret <8 x i32> %r
101
102}
103
104define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
105; CHECK-LABEL: test_l_v2i64:
106; CHECK:       # %bb.0: # %entry
107; CHECK-NEXT:    lxvd2x 34, 0, 3
108; CHECK-NEXT:    blr
109entry:
110  %r = load <2 x i64>, <2 x i64>* %p, align 8
111  ret <2 x i64> %r
112
113}
114
115define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
116; CHECK-LABEL: test_l_v4i64:
117; CHECK:       # %bb.0: # %entry
118; CHECK-NEXT:    li 4, 16
119; CHECK-NEXT:    lxvd2x 34, 0, 3
120; CHECK-NEXT:    lxvd2x 35, 3, 4
121; CHECK-NEXT:    blr
122entry:
123  %r = load <4 x i64>, <4 x i64>* %p, align 8
124  ret <4 x i64> %r
125
126}
127
128define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
129; CHECK-LABEL: test_l_v4float:
130; CHECK:       # %bb.0: # %entry
131; CHECK-NEXT:    li 4, 15
132; CHECK-NEXT:    lvsl 3, 0, 3
133; CHECK-NEXT:    lvx 2, 3, 4
134; CHECK-NEXT:    lvx 4, 0, 3
135; CHECK-NEXT:    vperm 2, 4, 2, 3
136; CHECK-NEXT:    blr
137entry:
138  %r = load <4 x float>, <4 x float>* %p, align 4
139  ret <4 x float> %r
140
141}
142
143define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
144; CHECK-LABEL: test_l_v8float:
145; CHECK:       # %bb.0: # %entry
146; CHECK-NEXT:    li 4, 31
147; CHECK-NEXT:    lvsl 5, 0, 3
148; CHECK-NEXT:    lvx 2, 3, 4
149; CHECK-NEXT:    li 4, 16
150; CHECK-NEXT:    lvx 4, 3, 4
151; CHECK-NEXT:    lvx 0, 0, 3
152; CHECK-NEXT:    vperm 3, 4, 2, 5
153; CHECK-NEXT:    vperm 2, 0, 4, 5
154; CHECK-NEXT:    blr
155entry:
156  %r = load <8 x float>, <8 x float>* %p, align 4
157  ret <8 x float> %r
158
159}
160
161define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
162; CHECK-LABEL: test_l_v2double:
163; CHECK:       # %bb.0: # %entry
164; CHECK-NEXT:    lxvd2x 34, 0, 3
165; CHECK-NEXT:    blr
166entry:
167  %r = load <2 x double>, <2 x double>* %p, align 8
168  ret <2 x double> %r
169
170}
171
172define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
173; CHECK-LABEL: test_l_v4double:
174; CHECK:       # %bb.0: # %entry
175; CHECK-NEXT:    li 4, 16
176; CHECK-NEXT:    lxvd2x 34, 0, 3
177; CHECK-NEXT:    lxvd2x 35, 3, 4
178; CHECK-NEXT:    blr
179entry:
180  %r = load <4 x double>, <4 x double>* %p, align 8
181  ret <4 x double> %r
182
183}
184
185define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
186; CHECK-LABEL: test_l_p8v16i8:
187; CHECK:       # %bb.0: # %entry
188; CHECK-NEXT:    lxvw4x 34, 0, 3
189; CHECK-NEXT:    blr
190entry:
191  %r = load <16 x i8>, <16 x i8>* %p, align 1
192  ret <16 x i8> %r
193
194}
195
196define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
197; CHECK-LABEL: test_l_p8v32i8:
198; CHECK:       # %bb.0: # %entry
199; CHECK-NEXT:    li 4, 16
200; CHECK-NEXT:    lxvw4x 34, 0, 3
201; CHECK-NEXT:    lxvw4x 35, 3, 4
202; CHECK-NEXT:    blr
203entry:
204  %r = load <32 x i8>, <32 x i8>* %p, align 1
205  ret <32 x i8> %r
206
207}
208
209define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
210; CHECK-LABEL: test_l_p8v8i16:
211; CHECK:       # %bb.0: # %entry
212; CHECK-NEXT:    lxvw4x 34, 0, 3
213; CHECK-NEXT:    blr
214entry:
215  %r = load <8 x i16>, <8 x i16>* %p, align 2
216  ret <8 x i16> %r
217
218}
219
220define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
221; CHECK-LABEL: test_l_p8v16i16:
222; CHECK:       # %bb.0: # %entry
223; CHECK-NEXT:    li 4, 16
224; CHECK-NEXT:    lxvw4x 34, 0, 3
225; CHECK-NEXT:    lxvw4x 35, 3, 4
226; CHECK-NEXT:    blr
227entry:
228  %r = load <16 x i16>, <16 x i16>* %p, align 2
229  ret <16 x i16> %r
230
231}
232
233define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
234; CHECK-LABEL: test_l_p8v4i32:
235; CHECK:       # %bb.0: # %entry
236; CHECK-NEXT:    lxvw4x 34, 0, 3
237; CHECK-NEXT:    blr
238entry:
239  %r = load <4 x i32>, <4 x i32>* %p, align 4
240  ret <4 x i32> %r
241
242}
243
244define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
245; CHECK-LABEL: test_l_p8v8i32:
246; CHECK:       # %bb.0: # %entry
247; CHECK-NEXT:    li 4, 16
248; CHECK-NEXT:    lxvw4x 34, 0, 3
249; CHECK-NEXT:    lxvw4x 35, 3, 4
250; CHECK-NEXT:    blr
251entry:
252  %r = load <8 x i32>, <8 x i32>* %p, align 4
253  ret <8 x i32> %r
254
255}
256
257define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
258; CHECK-LABEL: test_l_p8v2i64:
259; CHECK:       # %bb.0: # %entry
260; CHECK-NEXT:    lxvd2x 34, 0, 3
261; CHECK-NEXT:    blr
262entry:
263  %r = load <2 x i64>, <2 x i64>* %p, align 8
264  ret <2 x i64> %r
265
266}
267
268define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
269; CHECK-LABEL: test_l_p8v4i64:
270; CHECK:       # %bb.0: # %entry
271; CHECK-NEXT:    li 4, 16
272; CHECK-NEXT:    lxvd2x 34, 0, 3
273; CHECK-NEXT:    lxvd2x 35, 3, 4
274; CHECK-NEXT:    blr
275entry:
276  %r = load <4 x i64>, <4 x i64>* %p, align 8
277  ret <4 x i64> %r
278
279}
280
281define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
282; CHECK-LABEL: test_l_p8v4float:
283; CHECK:       # %bb.0: # %entry
284; CHECK-NEXT:    lxvw4x 34, 0, 3
285; CHECK-NEXT:    blr
286entry:
287  %r = load <4 x float>, <4 x float>* %p, align 4
288  ret <4 x float> %r
289
290}
291
292define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
293; CHECK-LABEL: test_l_p8v8float:
294; CHECK:       # %bb.0: # %entry
295; CHECK-NEXT:    li 4, 16
296; CHECK-NEXT:    lxvw4x 34, 0, 3
297; CHECK-NEXT:    lxvw4x 35, 3, 4
298; CHECK-NEXT:    blr
299entry:
300  %r = load <8 x float>, <8 x float>* %p, align 4
301  ret <8 x float> %r
302
303}
304
305define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
306; CHECK-LABEL: test_l_p8v2double:
307; CHECK:       # %bb.0: # %entry
308; CHECK-NEXT:    lxvd2x 34, 0, 3
309; CHECK-NEXT:    blr
310entry:
311  %r = load <2 x double>, <2 x double>* %p, align 8
312  ret <2 x double> %r
313
314}
315
316define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
317; CHECK-LABEL: test_l_p8v4double:
318; CHECK:       # %bb.0: # %entry
319; CHECK-NEXT:    li 4, 16
320; CHECK-NEXT:    lxvd2x 34, 0, 3
321; CHECK-NEXT:    lxvd2x 35, 3, 4
322; CHECK-NEXT:    blr
323entry:
324  %r = load <4 x double>, <4 x double>* %p, align 8
325  ret <4 x double> %r
326
327}
328
329define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
330; CHECK-LABEL: test_s_v16i8:
331; CHECK:       # %bb.0: # %entry
332; CHECK-NEXT:    stxvw4x 34, 0, 3
333; CHECK-NEXT:    blr
334entry:
335  store <16 x i8> %v, <16 x i8>* %p, align 1
336  ret void
337
338}
339
340define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
341; CHECK-LABEL: test_s_v32i8:
342; CHECK:       # %bb.0: # %entry
343; CHECK-NEXT:    li 4, 16
344; CHECK-NEXT:    stxvw4x 34, 0, 3
345; CHECK-NEXT:    stxvw4x 35, 3, 4
346; CHECK-NEXT:    blr
347entry:
348  store <32 x i8> %v, <32 x i8>* %p, align 1
349  ret void
350
351}
352
353define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
354; CHECK-LABEL: test_s_v8i16:
355; CHECK:       # %bb.0: # %entry
356; CHECK-NEXT:    stxvw4x 34, 0, 3
357; CHECK-NEXT:    blr
358entry:
359  store <8 x i16> %v, <8 x i16>* %p, align 2
360  ret void
361
362}
363
364define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
365; CHECK-LABEL: test_s_v16i16:
366; CHECK:       # %bb.0: # %entry
367; CHECK-NEXT:    li 4, 16
368; CHECK-NEXT:    stxvw4x 34, 0, 3
369; CHECK-NEXT:    stxvw4x 35, 3, 4
370; CHECK-NEXT:    blr
371entry:
372  store <16 x i16> %v, <16 x i16>* %p, align 2
373  ret void
374
375}
376
377define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
378; CHECK-LABEL: test_s_v4i32:
379; CHECK:       # %bb.0: # %entry
380; CHECK-NEXT:    stxvw4x 34, 0, 3
381; CHECK-NEXT:    blr
382entry:
383  store <4 x i32> %v, <4 x i32>* %p, align 4
384  ret void
385
386}
387
388define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
389; CHECK-LABEL: test_s_v8i32:
390; CHECK:       # %bb.0: # %entry
391; CHECK-NEXT:    li 4, 16
392; CHECK-NEXT:    stxvw4x 34, 0, 3
393; CHECK-NEXT:    stxvw4x 35, 3, 4
394; CHECK-NEXT:    blr
395entry:
396  store <8 x i32> %v, <8 x i32>* %p, align 4
397  ret void
398
399}
400
401define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
402; CHECK-LABEL: test_s_v2i64:
403; CHECK:       # %bb.0: # %entry
404; CHECK-NEXT:    stxvd2x 34, 0, 3
405; CHECK-NEXT:    blr
406entry:
407  store <2 x i64> %v, <2 x i64>* %p, align 8
408  ret void
409
410}
411
412define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
413; CHECK-LABEL: test_s_v4i64:
414; CHECK:       # %bb.0: # %entry
415; CHECK-NEXT:    li 4, 16
416; CHECK-NEXT:    stxvd2x 34, 0, 3
417; CHECK-NEXT:    stxvd2x 35, 3, 4
418; CHECK-NEXT:    blr
419entry:
420  store <4 x i64> %v, <4 x i64>* %p, align 8
421  ret void
422
423}
424
425define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
426; CHECK-LABEL: test_s_v4float:
427; CHECK:       # %bb.0: # %entry
428; CHECK-NEXT:    stxvw4x 34, 0, 3
429; CHECK-NEXT:    blr
430entry:
431  store <4 x float> %v, <4 x float>* %p, align 4
432  ret void
433
434}
435
436define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
437; CHECK-LABEL: test_s_v8float:
438; CHECK:       # %bb.0: # %entry
439; CHECK-NEXT:    li 4, 16
440; CHECK-NEXT:    stxvw4x 34, 0, 3
441; CHECK-NEXT:    stxvw4x 35, 3, 4
442; CHECK-NEXT:    blr
443entry:
444  store <8 x float> %v, <8 x float>* %p, align 4
445  ret void
446
447}
448
449define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
450; CHECK-LABEL: test_s_v2double:
451; CHECK:       # %bb.0: # %entry
452; CHECK-NEXT:    stxvd2x 34, 0, 3
453; CHECK-NEXT:    blr
454entry:
455  store <2 x double> %v, <2 x double>* %p, align 8
456  ret void
457
458}
459
460define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
461; CHECK-LABEL: test_s_v4double:
462; CHECK:       # %bb.0: # %entry
463; CHECK-NEXT:    li 4, 16
464; CHECK-NEXT:    stxvd2x 34, 0, 3
465; CHECK-NEXT:    stxvd2x 35, 3, 4
466; CHECK-NEXT:    blr
467entry:
468  store <4 x double> %v, <4 x double>* %p, align 8
469  ret void
470
471}
472
473attributes #0 = { nounwind "target-cpu"="pwr7" }
474attributes #2 = { nounwind "target-cpu"="pwr8" }
475
476