1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
3; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8
4
5; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
6; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9
7
8; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
9; RUN:   -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8-BE
10
11; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
12; RUN:   -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9-BE
13
14define <2 x i64> @load_swap00(<2 x i64>* %vp1, <2 x i64>* %vp2) {
15; CHECK-P8-LABEL: load_swap00:
16; CHECK-P8:       # %bb.0:
17; CHECK-P8-NEXT:    lxvd2x v2, 0, r3
18; CHECK-P8-NEXT:    blr
19;
20; CHECK-P9-LABEL: load_swap00:
21; CHECK-P9:       # %bb.0:
22; CHECK-P9-NEXT:    lxvd2x v2, 0, r3
23; CHECK-P9-NEXT:    blr
24;
25; CHECK-P8-BE-LABEL: load_swap00:
26; CHECK-P8-BE:       # %bb.0:
27; CHECK-P8-BE-NEXT:    lxvd2x v2, 0, r3
28; CHECK-P8-BE-NEXT:    xxswapd v2, v2
29; CHECK-P8-BE-NEXT:    blr
30;
31; CHECK-P9-BE-LABEL: load_swap00:
32; CHECK-P9-BE:       # %bb.0:
33; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
34; CHECK-P9-BE-NEXT:    xxswapd v2, v2
35; CHECK-P9-BE-NEXT:    blr
36  %v1 = load <2 x i64>, <2 x i64>* %vp1
37  %v2 = load <2 x i64>, <2 x i64>* %vp2
38  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
39  ret <2 x i64> %v3
40}
41
42define <2 x i64> @load_swap01(<2 x i64>* %vp1, <2 x i64>* %vp2) {
43; CHECK-P8-LABEL: load_swap01:
44; CHECK-P8:       # %bb.0:
45; CHECK-P8-NEXT:    lxvd2x v2, 0, r4
46; CHECK-P8-NEXT:    blr
47;
48; CHECK-P9-LABEL: load_swap01:
49; CHECK-P9:       # %bb.0:
50; CHECK-P9-NEXT:    lxvd2x v2, 0, r4
51; CHECK-P9-NEXT:    blr
52;
53; CHECK-P8-BE-LABEL: load_swap01:
54; CHECK-P8-BE:       # %bb.0:
55; CHECK-P8-BE-NEXT:    lxvd2x v2, 0, r4
56; CHECK-P8-BE-NEXT:    xxswapd v2, v2
57; CHECK-P8-BE-NEXT:    blr
58;
59; CHECK-P9-BE-LABEL: load_swap01:
60; CHECK-P9-BE:       # %bb.0:
61; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
62; CHECK-P9-BE-NEXT:    xxswapd v2, v2
63; CHECK-P9-BE-NEXT:    blr
64  %v1 = load <2 x i64>, <2 x i64>* %vp1
65  %v2 = load <2 x i64>, <2 x i64>* %vp2
66  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
67  ret <2 x i64> %v3
68}
69
70define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) {
71; CHECK-P8-LABEL: load_swap10:
72; CHECK-P8:       # %bb.0:
73; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
74; CHECK-P8-NEXT:    lvx v3, 0, r3
75; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_0@toc@l
76; CHECK-P8-NEXT:    lvx v2, 0, r4
77; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
78; CHECK-P8-NEXT:    blr
79;
80; CHECK-P9-LABEL: load_swap10:
81; CHECK-P9:       # %bb.0:
82; CHECK-P9-NEXT:    lxvw4x v2, 0, r3
83; CHECK-P9-NEXT:    blr
84;
85; CHECK-P8-BE-LABEL: load_swap10:
86; CHECK-P8-BE:       # %bb.0:
87; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
88; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
89; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
90; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
91; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
92; CHECK-P8-BE-NEXT:    blr
93;
94; CHECK-P9-BE-LABEL: load_swap10:
95; CHECK-P9-BE:       # %bb.0:
96; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
97; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
98; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI2_0@toc@l
99; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
100; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
101; CHECK-P9-BE-NEXT:    blr
102  %v1 = load <4 x i32>, <4 x i32>* %vp1
103  %v2 = load <4 x i32>, <4 x i32>* %vp2
104  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
105  ret <4 x i32> %v3
106}
107
108define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) {
109; CHECK-P8-LABEL: load_swap11:
110; CHECK-P8:       # %bb.0:
111; CHECK-P8-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
112; CHECK-P8-NEXT:    lvx v3, 0, r4
113; CHECK-P8-NEXT:    addi r3, r3, .LCPI3_0@toc@l
114; CHECK-P8-NEXT:    lvx v2, 0, r3
115; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
116; CHECK-P8-NEXT:    blr
117;
118; CHECK-P9-LABEL: load_swap11:
119; CHECK-P9:       # %bb.0:
120; CHECK-P9-NEXT:    lxvw4x v2, 0, r4
121; CHECK-P9-NEXT:    blr
122;
123; CHECK-P8-BE-LABEL: load_swap11:
124; CHECK-P8-BE:       # %bb.0:
125; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
126; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
127; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
128; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
129; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
130; CHECK-P8-BE-NEXT:    blr
131;
132; CHECK-P9-BE-LABEL: load_swap11:
133; CHECK-P9-BE:       # %bb.0:
134; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
135; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
136; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
137; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
138; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
139; CHECK-P9-BE-NEXT:    blr
140  %v1 = load <4 x i32>, <4 x i32>* %vp1
141  %v2 = load <4 x i32>, <4 x i32>* %vp2
142  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
143  ret <4 x i32> %v3
144}
145
146define <8 x i16> @load_swap20(<8 x i16>* %vp1, <8 x i16>* %vp2){
147; CHECK-P8-LABEL: load_swap20:
148; CHECK-P8:       # %bb.0:
149; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
150; CHECK-P8-NEXT:    lvx v3, 0, r3
151; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_0@toc@l
152; CHECK-P8-NEXT:    lvx v2, 0, r4
153; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
154; CHECK-P8-NEXT:    blr
155;
156; CHECK-P9-LABEL: load_swap20:
157; CHECK-P9:       # %bb.0:
158; CHECK-P9-NEXT:    lxvh8x v2, 0, r3
159; CHECK-P9-NEXT:    blr
160;
161; CHECK-P8-BE-LABEL: load_swap20:
162; CHECK-P8-BE:       # %bb.0:
163; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
164; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
165; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI4_0@toc@l
166; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
167; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
168; CHECK-P8-BE-NEXT:    blr
169;
170; CHECK-P9-BE-LABEL: load_swap20:
171; CHECK-P9-BE:       # %bb.0:
172; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
173; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
174; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
175; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
176; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
177; CHECK-P9-BE-NEXT:    blr
178  %v1 = load <8 x i16>, <8 x i16>* %vp1
179  %v2 = load <8 x i16>, <8 x i16>* %vp2
180  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
181  ret <8 x i16> %v3
182}
183
184define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){
185; CHECK-P8-LABEL: load_swap21:
186; CHECK-P8:       # %bb.0:
187; CHECK-P8-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
188; CHECK-P8-NEXT:    lvx v3, 0, r4
189; CHECK-P8-NEXT:    addi r3, r3, .LCPI5_0@toc@l
190; CHECK-P8-NEXT:    lvx v2, 0, r3
191; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
192; CHECK-P8-NEXT:    blr
193;
194; CHECK-P9-LABEL: load_swap21:
195; CHECK-P9:       # %bb.0:
196; CHECK-P9-NEXT:    lxvh8x v2, 0, r4
197; CHECK-P9-NEXT:    blr
198;
199; CHECK-P8-BE-LABEL: load_swap21:
200; CHECK-P8-BE:       # %bb.0:
201; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
202; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
203; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
204; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
205; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
206; CHECK-P8-BE-NEXT:    blr
207;
208; CHECK-P9-BE-LABEL: load_swap21:
209; CHECK-P9-BE:       # %bb.0:
210; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
211; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
212; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
213; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
214; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
215; CHECK-P9-BE-NEXT:    blr
216  %v1 = load <8 x i16>, <8 x i16>* %vp1
217  %v2 = load <8 x i16>, <8 x i16>* %vp2
218  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
219  ret <8 x i16> %v3
220}
221
222define <16 x i8> @load_swap30(<16 x i8>* %vp1, <16 x i8>* %vp2){
223; CHECK-P8-LABEL: load_swap30:
224; CHECK-P8:       # %bb.0:
225; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
226; CHECK-P8-NEXT:    lvx v3, 0, r3
227; CHECK-P8-NEXT:    addi r4, r4, .LCPI6_0@toc@l
228; CHECK-P8-NEXT:    lvx v2, 0, r4
229; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
230; CHECK-P8-NEXT:    blr
231;
232; CHECK-P9-LABEL: load_swap30:
233; CHECK-P9:       # %bb.0:
234; CHECK-P9-NEXT:    lxvb16x v2, 0, r3
235; CHECK-P9-NEXT:    blr
236;
237; CHECK-P8-BE-LABEL: load_swap30:
238; CHECK-P8-BE:       # %bb.0:
239; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
240; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
241; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI6_0@toc@l
242; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
243; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
244; CHECK-P8-BE-NEXT:    blr
245;
246; CHECK-P9-BE-LABEL: load_swap30:
247; CHECK-P9-BE:       # %bb.0:
248; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
249; CHECK-P9-BE-NEXT:    xxbrq v2, vs0
250; CHECK-P9-BE-NEXT:    blr
251  %v1 = load <16 x i8>, <16 x i8>* %vp1
252  %v2 = load <16 x i8>, <16 x i8>* %vp2
253  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
254  ret <16 x i8> %v3
255}
256
257define <16 x i8> @load_swap31(<16 x i8>* %vp1, <16 x i8>* %vp2){
258; CHECK-P8-LABEL: load_swap31:
259; CHECK-P8:       # %bb.0:
260; CHECK-P8-NEXT:    addis r3, r2, .LCPI7_0@toc@ha
261; CHECK-P8-NEXT:    lvx v3, 0, r4
262; CHECK-P8-NEXT:    addi r3, r3, .LCPI7_0@toc@l
263; CHECK-P8-NEXT:    lvx v2, 0, r3
264; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
265; CHECK-P8-NEXT:    blr
266;
267; CHECK-P9-LABEL: load_swap31:
268; CHECK-P9:       # %bb.0:
269; CHECK-P9-NEXT:    lxvb16x v2, 0, r4
270; CHECK-P9-NEXT:    blr
271;
272; CHECK-P8-BE-LABEL: load_swap31:
273; CHECK-P8-BE:       # %bb.0:
274; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI7_0@toc@ha
275; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
276; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI7_0@toc@l
277; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
278; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
279; CHECK-P8-BE-NEXT:    blr
280;
281; CHECK-P9-BE-LABEL: load_swap31:
282; CHECK-P9-BE:       # %bb.0:
283; CHECK-P9-BE-NEXT:    lxv vs0, 0(r4)
284; CHECK-P9-BE-NEXT:    xxbrq v2, vs0
285; CHECK-P9-BE-NEXT:    blr
286  %v1 = load <16 x i8>, <16 x i8>* %vp1
287  %v2 = load <16 x i8>, <16 x i8>* %vp2
288  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
289  ret <16 x i8> %v3
290}
291
292define <2 x double> @load_swap40(<2 x double>* %vp1, <2 x double>* %vp2) {
293; CHECK-P8-LABEL: load_swap40:
294; CHECK-P8:       # %bb.0:
295; CHECK-P8-NEXT:    lxvd2x v2, 0, r4
296; CHECK-P8-NEXT:    blr
297;
298; CHECK-P9-LABEL: load_swap40:
299; CHECK-P9:       # %bb.0:
300; CHECK-P9-NEXT:    lxvd2x v2, 0, r4
301; CHECK-P9-NEXT:    blr
302;
303; CHECK-P8-BE-LABEL: load_swap40:
304; CHECK-P8-BE:       # %bb.0:
305; CHECK-P8-BE-NEXT:    lxvd2x vs0, 0, r4
306; CHECK-P8-BE-NEXT:    xxswapd v2, vs0
307; CHECK-P8-BE-NEXT:    blr
308;
309; CHECK-P9-BE-LABEL: load_swap40:
310; CHECK-P9-BE:       # %bb.0:
311; CHECK-P9-BE-NEXT:    lxv vs0, 0(r4)
312; CHECK-P9-BE-NEXT:    xxswapd v2, vs0
313; CHECK-P9-BE-NEXT:    blr
314  %v1 = load <2 x double>, <2 x double>* %vp1
315  %v2 = load <2 x double>, <2 x double>* %vp2
316  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
317  ret <2 x double> %v3
318}
319
320define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) {
321; CHECK-P8-LABEL: load_swap50:
322; CHECK-P8:       # %bb.0:
323; CHECK-P8-NEXT:    addis r4, r2, .LCPI9_0@toc@ha
324; CHECK-P8-NEXT:    lvx v3, 0, r3
325; CHECK-P8-NEXT:    addi r4, r4, .LCPI9_0@toc@l
326; CHECK-P8-NEXT:    lvx v2, 0, r4
327; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
328; CHECK-P8-NEXT:    blr
329;
330; CHECK-P9-LABEL: load_swap50:
331; CHECK-P9:       # %bb.0:
332; CHECK-P9-NEXT:    lxvw4x v2, 0, r3
333; CHECK-P9-NEXT:    blr
334;
335; CHECK-P8-BE-LABEL: load_swap50:
336; CHECK-P8-BE:       # %bb.0:
337; CHECK-P8-BE-NEXT:    addis r4, r2, .LCPI9_0@toc@ha
338; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
339; CHECK-P8-BE-NEXT:    addi r4, r4, .LCPI9_0@toc@l
340; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r4
341; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
342; CHECK-P8-BE-NEXT:    blr
343;
344; CHECK-P9-BE-LABEL: load_swap50:
345; CHECK-P9-BE:       # %bb.0:
346; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
347; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
348; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI9_0@toc@l
349; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
350; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
351; CHECK-P9-BE-NEXT:    blr
352  %v1 = load <4 x float>, <4 x float>* %vp1
353  %v2 = load <4 x float>, <4 x float>* %vp2
354  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
355  ret <4 x float> %v3
356}
357
358define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) {
359; CHECK-P8-LABEL: load_swap51:
360; CHECK-P8:       # %bb.0:
361; CHECK-P8-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
362; CHECK-P8-NEXT:    lvx v3, 0, r4
363; CHECK-P8-NEXT:    addi r3, r3, .LCPI10_0@toc@l
364; CHECK-P8-NEXT:    lvx v2, 0, r3
365; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
366; CHECK-P8-NEXT:    blr
367;
368; CHECK-P9-LABEL: load_swap51:
369; CHECK-P9:       # %bb.0:
370; CHECK-P9-NEXT:    lxvw4x v2, 0, r4
371; CHECK-P9-NEXT:    blr
372;
373; CHECK-P8-BE-LABEL: load_swap51:
374; CHECK-P8-BE:       # %bb.0:
375; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
376; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
377; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI10_0@toc@l
378; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
379; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
380; CHECK-P8-BE-NEXT:    blr
381;
382; CHECK-P9-BE-LABEL: load_swap51:
383; CHECK-P9-BE:       # %bb.0:
384; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
385; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
386; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI10_0@toc@l
387; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
388; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
389; CHECK-P9-BE-NEXT:    blr
390  %v1 = load <4 x float>, <4 x float>* %vp1
391  %v2 = load <4 x float>, <4 x float>* %vp2
392  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
393  ret <4 x float> %v3
394}
395
396define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) {
397; CHECK-P8-LABEL: swap_store00:
398; CHECK-P8:       # %bb.0:
399; CHECK-P8-NEXT:    stxvd2x v2, 0, r7
400; CHECK-P8-NEXT:    blr
401;
402; CHECK-P9-LABEL: swap_store00:
403; CHECK-P9:       # %bb.0:
404; CHECK-P9-NEXT:    stxvd2x v2, 0, r7
405; CHECK-P9-NEXT:    blr
406;
407; CHECK-P8-BE-LABEL: swap_store00:
408; CHECK-P8-BE:       # %bb.0:
409; CHECK-P8-BE-NEXT:    xxswapd vs0, v2
410; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
411; CHECK-P8-BE-NEXT:    blr
412;
413; CHECK-P9-BE-LABEL: swap_store00:
414; CHECK-P9-BE:       # %bb.0:
415; CHECK-P9-BE-NEXT:    xxswapd vs0, v2
416; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
417; CHECK-P9-BE-NEXT:    blr
418  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
419  store <2 x i64> %v3, <2 x i64>* %vp
420  ret void
421}
422
423define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) {
424; CHECK-P8-LABEL: swap_store01:
425; CHECK-P8:       # %bb.0:
426; CHECK-P8-NEXT:    stxvd2x v3, 0, r7
427; CHECK-P8-NEXT:    blr
428;
429; CHECK-P9-LABEL: swap_store01:
430; CHECK-P9:       # %bb.0:
431; CHECK-P9-NEXT:    stxvd2x v3, 0, r7
432; CHECK-P9-NEXT:    blr
433;
434; CHECK-P8-BE-LABEL: swap_store01:
435; CHECK-P8-BE:       # %bb.0:
436; CHECK-P8-BE-NEXT:    xxswapd vs0, v3
437; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
438; CHECK-P8-BE-NEXT:    blr
439;
440; CHECK-P9-BE-LABEL: swap_store01:
441; CHECK-P9-BE:       # %bb.0:
442; CHECK-P9-BE-NEXT:    xxswapd vs0, v3
443; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
444; CHECK-P9-BE-NEXT:    blr
445  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
446  store <2 x i64> %v3, <2 x i64>* %vp
447  ret void
448}
449
450define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
451; CHECK-P8-LABEL: swap_store10:
452; CHECK-P8:       # %bb.0:
453; CHECK-P8-NEXT:    addis r3, r2, .LCPI13_0@toc@ha
454; CHECK-P8-NEXT:    addi r3, r3, .LCPI13_0@toc@l
455; CHECK-P8-NEXT:    lvx v3, 0, r3
456; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
457; CHECK-P8-NEXT:    stvx v2, 0, r7
458; CHECK-P8-NEXT:    blr
459;
460; CHECK-P9-LABEL: swap_store10:
461; CHECK-P9:       # %bb.0:
462; CHECK-P9-NEXT:    stxvw4x v2, 0, r7
463; CHECK-P9-NEXT:    blr
464;
465; CHECK-P8-BE-LABEL: swap_store10:
466; CHECK-P8-BE:       # %bb.0:
467; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI13_0@toc@ha
468; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI13_0@toc@l
469; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
470; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
471; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
472; CHECK-P8-BE-NEXT:    blr
473;
474; CHECK-P9-BE-LABEL: swap_store10:
475; CHECK-P9-BE:       # %bb.0:
476; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI13_0@toc@ha
477; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI13_0@toc@l
478; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
479; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
480; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
481; CHECK-P9-BE-NEXT:    blr
482  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
483  store <4 x i32> %v3, <4 x i32>* %vp
484  ret void
485}
486
487define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
488; CHECK-P8-LABEL: swap_store11:
489; CHECK-P8:       # %bb.0:
490; CHECK-P8-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
491; CHECK-P8-NEXT:    addi r3, r3, .LCPI14_0@toc@l
492; CHECK-P8-NEXT:    lvx v2, 0, r3
493; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
494; CHECK-P8-NEXT:    stvx v2, 0, r7
495; CHECK-P8-NEXT:    blr
496;
497; CHECK-P9-LABEL: swap_store11:
498; CHECK-P9:       # %bb.0:
499; CHECK-P9-NEXT:    stxvw4x v3, 0, r7
500; CHECK-P9-NEXT:    blr
501;
502; CHECK-P8-BE-LABEL: swap_store11:
503; CHECK-P8-BE:       # %bb.0:
504; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
505; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI14_0@toc@l
506; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
507; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
508; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
509; CHECK-P8-BE-NEXT:    blr
510;
511; CHECK-P9-BE-LABEL: swap_store11:
512; CHECK-P9-BE:       # %bb.0:
513; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
514; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI14_0@toc@l
515; CHECK-P9-BE-NEXT:    lxvx v2, 0, r3
516; CHECK-P9-BE-NEXT:    vperm v2, v3, v3, v2
517; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
518; CHECK-P9-BE-NEXT:    blr
519  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
520  store <4 x i32> %v3, <4 x i32>* %vp
521  ret void
522}
523
524define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
525; CHECK-P8-LABEL: swap_store20:
526; CHECK-P8:       # %bb.0:
527; CHECK-P8-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
528; CHECK-P8-NEXT:    addi r3, r3, .LCPI15_0@toc@l
529; CHECK-P8-NEXT:    lvx v3, 0, r3
530; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
531; CHECK-P8-NEXT:    stvx v2, 0, r7
532; CHECK-P8-NEXT:    blr
533;
534; CHECK-P9-LABEL: swap_store20:
535; CHECK-P9:       # %bb.0:
536; CHECK-P9-NEXT:    stxvh8x v2, 0, r7
537; CHECK-P9-NEXT:    blr
538;
539; CHECK-P8-BE-LABEL: swap_store20:
540; CHECK-P8-BE:       # %bb.0:
541; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
542; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI15_0@toc@l
543; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
544; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
545; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
546; CHECK-P8-BE-NEXT:    blr
547;
548; CHECK-P9-BE-LABEL: swap_store20:
549; CHECK-P9-BE:       # %bb.0:
550; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
551; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI15_0@toc@l
552; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
553; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
554; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
555; CHECK-P9-BE-NEXT:    blr
556  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
557  store <8 x i16> %v3, <8 x i16>* %vp
558  ret void
559}
560
561define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
562; CHECK-P8-LABEL: swap_store21:
563; CHECK-P8:       # %bb.0:
564; CHECK-P8-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
565; CHECK-P8-NEXT:    addi r3, r3, .LCPI16_0@toc@l
566; CHECK-P8-NEXT:    lvx v2, 0, r3
567; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
568; CHECK-P8-NEXT:    stvx v2, 0, r7
569; CHECK-P8-NEXT:    blr
570;
571; CHECK-P9-LABEL: swap_store21:
572; CHECK-P9:       # %bb.0:
573; CHECK-P9-NEXT:    stxvh8x v3, 0, r7
574; CHECK-P9-NEXT:    blr
575;
576; CHECK-P8-BE-LABEL: swap_store21:
577; CHECK-P8-BE:       # %bb.0:
578; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
579; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI16_0@toc@l
580; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
581; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
582; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
583; CHECK-P8-BE-NEXT:    blr
584;
585; CHECK-P9-BE-LABEL: swap_store21:
586; CHECK-P9-BE:       # %bb.0:
587; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
588; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI16_0@toc@l
589; CHECK-P9-BE-NEXT:    lxvx v2, 0, r3
590; CHECK-P9-BE-NEXT:    vperm v2, v3, v3, v2
591; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
592; CHECK-P9-BE-NEXT:    blr
593  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
594  store <8 x i16> %v3, <8 x i16>* %vp
595  ret void
596}
597
598define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
599; CHECK-P8-LABEL: swap_store30:
600; CHECK-P8:       # %bb.0:
601; CHECK-P8-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
602; CHECK-P8-NEXT:    addi r3, r3, .LCPI17_0@toc@l
603; CHECK-P8-NEXT:    lvx v3, 0, r3
604; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
605; CHECK-P8-NEXT:    stvx v2, 0, r7
606; CHECK-P8-NEXT:    blr
607;
608; CHECK-P9-LABEL: swap_store30:
609; CHECK-P9:       # %bb.0:
610; CHECK-P9-NEXT:    stxvb16x v2, 0, r7
611; CHECK-P9-NEXT:    blr
612;
613; CHECK-P8-BE-LABEL: swap_store30:
614; CHECK-P8-BE:       # %bb.0:
615; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
616; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
617; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
618; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
619; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
620; CHECK-P8-BE-NEXT:    blr
621;
622; CHECK-P9-BE-LABEL: swap_store30:
623; CHECK-P9-BE:       # %bb.0:
624; CHECK-P9-BE-NEXT:    xxbrq vs0, v2
625; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
626; CHECK-P9-BE-NEXT:    blr
627  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
628  store <16 x i8> %v3, <16 x i8>* %vp
629  ret void
630}
631
632define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
633; CHECK-P8-LABEL: swap_store31:
634; CHECK-P8:       # %bb.0:
635; CHECK-P8-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
636; CHECK-P8-NEXT:    addi r3, r3, .LCPI18_0@toc@l
637; CHECK-P8-NEXT:    lvx v2, 0, r3
638; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
639; CHECK-P8-NEXT:    stvx v2, 0, r7
640; CHECK-P8-NEXT:    blr
641;
642; CHECK-P9-LABEL: swap_store31:
643; CHECK-P9:       # %bb.0:
644; CHECK-P9-NEXT:    stxvb16x v3, 0, r7
645; CHECK-P9-NEXT:    blr
646;
647; CHECK-P8-BE-LABEL: swap_store31:
648; CHECK-P8-BE:       # %bb.0:
649; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
650; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
651; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
652; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
653; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
654; CHECK-P8-BE-NEXT:    blr
655;
656; CHECK-P9-BE-LABEL: swap_store31:
657; CHECK-P9-BE:       # %bb.0:
658; CHECK-P9-BE-NEXT:    xxbrq vs0, v3
659; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
660; CHECK-P9-BE-NEXT:    blr
661  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
662  store <16 x i8> %v3, <16 x i8>* %vp
663  ret void
664}
665
666define void @swap_store40(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) {
667; CHECK-P8-LABEL: swap_store40:
668; CHECK-P8:       # %bb.0:
669; CHECK-P8-NEXT:    stxvd2x v2, 0, r7
670; CHECK-P8-NEXT:    blr
671;
672; CHECK-P9-LABEL: swap_store40:
673; CHECK-P9:       # %bb.0:
674; CHECK-P9-NEXT:    stxvd2x v2, 0, r7
675; CHECK-P9-NEXT:    blr
676;
677; CHECK-P8-BE-LABEL: swap_store40:
678; CHECK-P8-BE:       # %bb.0:
679; CHECK-P8-BE-NEXT:    xxswapd vs0, v2
680; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
681; CHECK-P8-BE-NEXT:    blr
682;
683; CHECK-P9-BE-LABEL: swap_store40:
684; CHECK-P9-BE:       # %bb.0:
685; CHECK-P9-BE-NEXT:    xxswapd vs0, v2
686; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
687; CHECK-P9-BE-NEXT:    blr
688  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 1, i32 0>
689  store <2 x double> %v3, <2 x double>* %vp
690  ret void
691}
692
693define void @swap_store41(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) {
694; CHECK-P8-LABEL: swap_store41:
695; CHECK-P8:       # %bb.0:
696; CHECK-P8-NEXT:    stxvd2x v3, 0, r7
697; CHECK-P8-NEXT:    blr
698;
699; CHECK-P9-LABEL: swap_store41:
700; CHECK-P9:       # %bb.0:
701; CHECK-P9-NEXT:    stxvd2x v3, 0, r7
702; CHECK-P9-NEXT:    blr
703;
704; CHECK-P8-BE-LABEL: swap_store41:
705; CHECK-P8-BE:       # %bb.0:
706; CHECK-P8-BE-NEXT:    xxswapd vs0, v3
707; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
708; CHECK-P8-BE-NEXT:    blr
709;
710; CHECK-P9-BE-LABEL: swap_store41:
711; CHECK-P9-BE:       # %bb.0:
712; CHECK-P9-BE-NEXT:    xxswapd vs0, v3
713; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
714; CHECK-P9-BE-NEXT:    blr
715  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
716  store <2 x double> %v3, <2 x double>* %vp
717  ret void
718}
719
720define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
721; CHECK-P8-LABEL: swap_store50:
722; CHECK-P8:       # %bb.0:
723; CHECK-P8-NEXT:    addis r3, r2, .LCPI21_0@toc@ha
724; CHECK-P8-NEXT:    addi r3, r3, .LCPI21_0@toc@l
725; CHECK-P8-NEXT:    lvx v3, 0, r3
726; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
727; CHECK-P8-NEXT:    stvx v2, 0, r7
728; CHECK-P8-NEXT:    blr
729;
730; CHECK-P9-LABEL: swap_store50:
731; CHECK-P9:       # %bb.0:
732; CHECK-P9-NEXT:    stxvw4x v2, 0, r7
733; CHECK-P9-NEXT:    blr
734;
735; CHECK-P8-BE-LABEL: swap_store50:
736; CHECK-P8-BE:       # %bb.0:
737; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI21_0@toc@ha
738; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI21_0@toc@l
739; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
740; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
741; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
742; CHECK-P8-BE-NEXT:    blr
743;
744; CHECK-P9-BE-LABEL: swap_store50:
745; CHECK-P9-BE:       # %bb.0:
746; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI21_0@toc@ha
747; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI21_0@toc@l
748; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
749; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
750; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
751; CHECK-P9-BE-NEXT:    blr
752  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
753  store <4 x float> %v3, <4 x float>* %vp
754  ret void
755}
756
757define void @swap_store51(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
758; CHECK-P8-LABEL: swap_store51:
759; CHECK-P8:       # %bb.0:
760; CHECK-P8-NEXT:    addis r3, r2, .LCPI22_0@toc@ha
761; CHECK-P8-NEXT:    addi r3, r3, .LCPI22_0@toc@l
762; CHECK-P8-NEXT:    lvx v2, 0, r3
763; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
764; CHECK-P8-NEXT:    stvx v2, 0, r7
765; CHECK-P8-NEXT:    blr
766;
767; CHECK-P9-LABEL: swap_store51:
768; CHECK-P9:       # %bb.0:
769; CHECK-P9-NEXT:    stxvw4x v3, 0, r7
770; CHECK-P9-NEXT:    blr
771;
772; CHECK-P8-BE-LABEL: swap_store51:
773; CHECK-P8-BE:       # %bb.0:
774; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI22_0@toc@ha
775; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI22_0@toc@l
776; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
777; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
778; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
779; CHECK-P8-BE-NEXT:    blr
780;
781; CHECK-P9-BE-LABEL: swap_store51:
782; CHECK-P9-BE:       # %bb.0:
783; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI22_0@toc@ha
784; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI22_0@toc@l
785; CHECK-P9-BE-NEXT:    lxvx v2, 0, r3
786; CHECK-P9-BE-NEXT:    vperm v2, v3, v3, v2
787; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
788; CHECK-P9-BE-NEXT:    blr
789  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
790  store <4 x float> %v3, <4 x float>* %vp
791  ret void
792}
793