1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -mtriple=powerpc-unknown-linux-gnu < %s -instcombine | \
3; RUN:   llc -mtriple=ppc32-- -mcpu=g5 | not grep vperm
4; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | FileCheck %s
5
6define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
7; CHECK-LABEL: VSLDOI_xy:
8; CHECK:       # %bb.0: # %entry
9; CHECK-NEXT:    lvx 2, 0, 3
10; CHECK-NEXT:    lvx 3, 0, 4
11; CHECK-NEXT:    vsldoi 2, 2, 3, 5
12; CHECK-NEXT:    stvx 2, 0, 3
13; CHECK-NEXT:    blr
14entry:
15	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
16	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=1]
17	%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
18	%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
19	%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5		; <i8> [#uses=1]
20	%tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6		; <i8> [#uses=1]
21	%tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7		; <i8> [#uses=1]
22	%tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8		; <i8> [#uses=1]
23	%tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9		; <i8> [#uses=1]
24	%tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10		; <i8> [#uses=1]
25	%tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11		; <i8> [#uses=1]
26	%tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12		; <i8> [#uses=1]
27	%tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13		; <i8> [#uses=1]
28	%tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14		; <i8> [#uses=1]
29	%tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15		; <i8> [#uses=1]
30	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0		; <i8> [#uses=1]
31	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1		; <i8> [#uses=1]
32	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2		; <i8> [#uses=1]
33	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3		; <i8> [#uses=1]
34	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4		; <i8> [#uses=1]
35	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0		; <<16 x i8>> [#uses=1]
36	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
37	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
38	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
39	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
40	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
41	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
42	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
43	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
44	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
45	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
46	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
47	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
48	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
49	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
50	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
51	%tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]
52	store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A
53	ret void
54}
55
56define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
57; CHECK-LABEL: VSLDOI_xx:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    lvx 2, 0, 3
60; CHECK-NEXT:    vsldoi 2, 2, 2, 5
61; CHECK-NEXT:    stvx 2, 0, 3
62; CHECK-NEXT:    blr
63	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
64	%tmp2 = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
65	%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
66	%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
67	%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5		; <i8> [#uses=1]
68	%tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6		; <i8> [#uses=1]
69	%tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7		; <i8> [#uses=1]
70	%tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8		; <i8> [#uses=1]
71	%tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9		; <i8> [#uses=1]
72	%tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10		; <i8> [#uses=1]
73	%tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11		; <i8> [#uses=1]
74	%tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12		; <i8> [#uses=1]
75	%tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13		; <i8> [#uses=1]
76	%tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14		; <i8> [#uses=1]
77	%tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15		; <i8> [#uses=1]
78	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0		; <i8> [#uses=1]
79	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1		; <i8> [#uses=1]
80	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2		; <i8> [#uses=1]
81	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3		; <i8> [#uses=1]
82	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4		; <i8> [#uses=1]
83	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0		; <<16 x i8>> [#uses=1]
84	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
85	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
86	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
87	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
88	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
89	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
90	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
91	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
92	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
93	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
94	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
95	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
96	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
97	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
98	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
99	%tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]
100	store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A
101	ret void
102}
103
104define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
105; CHECK-LABEL: VPERM_promote:
106; CHECK:       # %bb.0: # %entry
107; CHECK-NEXT:    lvx 2, 0, 3
108; CHECK-NEXT:    vspltisb 4, 14
109; CHECK-NEXT:    lvx 3, 0, 4
110; CHECK-NEXT:    vperm 2, 2, 3, 4
111; CHECK-NEXT:    stvx 2, 0, 3
112; CHECK-NEXT:    blr
113entry:
114	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
115	%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
116	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=1]
117	%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
118	%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > )		; <<4 x i32>> [#uses=1]
119	%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16>		; <<8 x i16>> [#uses=1]
120	store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A
121	ret void
122}
123
124declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
125
126define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
127; CHECK-LABEL: tb_l:
128; CHECK:       # %bb.0: # %entry
129; CHECK-NEXT:    lvx 2, 0, 3
130; CHECK-NEXT:    lvx 3, 0, 4
131; CHECK-NEXT:    vmrglb 2, 2, 3
132; CHECK-NEXT:    stvx 2, 0, 3
133; CHECK-NEXT:    blr
134entry:
135	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=8]
136	%tmp2 = load <16 x i8>, <16 x i8>* %B		; <<16 x i8>> [#uses=8]
137	%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
138	%tmp3 = extractelement <16 x i8> %tmp2, i32 8		; <i8> [#uses=1]
139	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
140	%tmp5 = extractelement <16 x i8> %tmp2, i32 9		; <i8> [#uses=1]
141	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
142	%tmp7 = extractelement <16 x i8> %tmp2, i32 10		; <i8> [#uses=1]
143	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
144	%tmp9 = extractelement <16 x i8> %tmp2, i32 11		; <i8> [#uses=1]
145	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
146	%tmp11 = extractelement <16 x i8> %tmp2, i32 12		; <i8> [#uses=1]
147	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
148	%tmp13 = extractelement <16 x i8> %tmp2, i32 13		; <i8> [#uses=1]
149	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
150	%tmp15 = extractelement <16 x i8> %tmp2, i32 14		; <i8> [#uses=1]
151	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
152	%tmp17 = extractelement <16 x i8> %tmp2, i32 15		; <i8> [#uses=1]
153	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0		; <<16 x i8>> [#uses=1]
154	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
155	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
156	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
157	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
158	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
159	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
160	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
161	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
162	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
163	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
164	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
165	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
166	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
167	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
168	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
169	store <16 x i8> %tmp33, <16 x i8>* %A
170	ret void
171}
172
173define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
174; CHECK-LABEL: th_l:
175; CHECK:       # %bb.0: # %entry
176; CHECK-NEXT:    lvx 2, 0, 3
177; CHECK-NEXT:    lvx 3, 0, 4
178; CHECK-NEXT:    vmrglh 2, 2, 3
179; CHECK-NEXT:    stvx 2, 0, 3
180; CHECK-NEXT:    blr
181entry:
182	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=4]
183	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=4]
184	%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
185	%tmp3 = extractelement <8 x i16> %tmp2, i32 4		; <i16> [#uses=1]
186	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
187	%tmp5 = extractelement <8 x i16> %tmp2, i32 5		; <i16> [#uses=1]
188	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
189	%tmp7 = extractelement <8 x i16> %tmp2, i32 6		; <i16> [#uses=1]
190	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
191	%tmp9 = extractelement <8 x i16> %tmp2, i32 7		; <i16> [#uses=1]
192	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0		; <<8 x i16>> [#uses=1]
193	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
194	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
195	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
196	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
197	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
198	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
199	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
200	store <8 x i16> %tmp17, <8 x i16>* %A
201	ret void
202}
203
204define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
205; CHECK-LABEL: tw_l:
206; CHECK:       # %bb.0: # %entry
207; CHECK-NEXT:    lvx 2, 0, 3
208; CHECK-NEXT:    lvx 3, 0, 4
209; CHECK-NEXT:    vmrglw 2, 2, 3
210; CHECK-NEXT:    stvx 2, 0, 3
211; CHECK-NEXT:    blr
212entry:
213	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
214	%tmp2 = load <4 x i32>, <4 x i32>* %B		; <<4 x i32>> [#uses=2]
215	%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
216	%tmp3 = extractelement <4 x i32> %tmp2, i32 2		; <i32> [#uses=1]
217	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
218	%tmp5 = extractelement <4 x i32> %tmp2, i32 3		; <i32> [#uses=1]
219	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0		; <<4 x i32>> [#uses=1]
220	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
221	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
222	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
223	store <4 x i32> %tmp9, <4 x i32>* %A
224	ret void
225}
226
227define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
228; CHECK-LABEL: tb_h:
229; CHECK:       # %bb.0: # %entry
230; CHECK-NEXT:    lvx 2, 0, 3
231; CHECK-NEXT:    lvx 3, 0, 4
232; CHECK-NEXT:    vmrghb 2, 2, 3
233; CHECK-NEXT:    stvx 2, 0, 3
234; CHECK-NEXT:    blr
235entry:
236	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=8]
237	%tmp2 = load <16 x i8>, <16 x i8>* %B		; <<16 x i8>> [#uses=8]
238	%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
239	%tmp3 = extractelement <16 x i8> %tmp2, i32 0		; <i8> [#uses=1]
240	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
241	%tmp5 = extractelement <16 x i8> %tmp2, i32 1		; <i8> [#uses=1]
242	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
243	%tmp7 = extractelement <16 x i8> %tmp2, i32 2		; <i8> [#uses=1]
244	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
245	%tmp9 = extractelement <16 x i8> %tmp2, i32 3		; <i8> [#uses=1]
246	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
247	%tmp11 = extractelement <16 x i8> %tmp2, i32 4		; <i8> [#uses=1]
248	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
249	%tmp13 = extractelement <16 x i8> %tmp2, i32 5		; <i8> [#uses=1]
250	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
251	%tmp15 = extractelement <16 x i8> %tmp2, i32 6		; <i8> [#uses=1]
252	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
253	%tmp17 = extractelement <16 x i8> %tmp2, i32 7		; <i8> [#uses=1]
254	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0		; <<16 x i8>> [#uses=1]
255	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
256	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
257	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
258	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
259	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
260	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
261	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
262	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
263	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
264	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
265	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
266	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
267	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
268	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
269	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
270	store <16 x i8> %tmp33, <16 x i8>* %A
271	ret void
272}
273
274define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
275; CHECK-LABEL: th_h:
276; CHECK:       # %bb.0: # %entry
277; CHECK-NEXT:    lvx 2, 0, 3
278; CHECK-NEXT:    lvx 3, 0, 4
279; CHECK-NEXT:    vmrghh 2, 2, 3
280; CHECK-NEXT:    stvx 2, 0, 3
281; CHECK-NEXT:    blr
282entry:
283	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=4]
284	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=4]
285	%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
286	%tmp3 = extractelement <8 x i16> %tmp2, i32 0		; <i16> [#uses=1]
287	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
288	%tmp5 = extractelement <8 x i16> %tmp2, i32 1		; <i16> [#uses=1]
289	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
290	%tmp7 = extractelement <8 x i16> %tmp2, i32 2		; <i16> [#uses=1]
291	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
292	%tmp9 = extractelement <8 x i16> %tmp2, i32 3		; <i16> [#uses=1]
293	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0		; <<8 x i16>> [#uses=1]
294	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
295	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
296	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
297	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
298	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
299	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
300	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
301	store <8 x i16> %tmp17, <8 x i16>* %A
302	ret void
303}
304
305define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
306; CHECK-LABEL: tw_h:
307; CHECK:       # %bb.0: # %entry
308; CHECK-NEXT:    lvx 2, 0, 3
309; CHECK-NEXT:    lvx 3, 0, 4
310; CHECK-NEXT:    vmrghw 2, 3, 2
311; CHECK-NEXT:    stvx 2, 0, 3
312; CHECK-NEXT:    blr
313entry:
314	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
315	%tmp2 = load <4 x i32>, <4 x i32>* %B		; <<4 x i32>> [#uses=2]
316	%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
317	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
318	%tmp4 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]
319	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
320	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0		; <<4 x i32>> [#uses=1]
321	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
322	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
323	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
324	store <4 x i32> %tmp9, <4 x i32>* %A
325	ret void
326}
327
328define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
329; CHECK-LABEL: tw_h_flop:
330; CHECK:       # %bb.0:
331; CHECK-NEXT:    lvx 2, 0, 3
332; CHECK-NEXT:    lvx 3, 0, 4
333; CHECK-NEXT:    vmrghw 2, 2, 3
334; CHECK-NEXT:    stvx 2, 0, 3
335; CHECK-NEXT:    blr
336	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
337	%tmp2 = load <4 x i32>, <4 x i32>* %B		; <<4 x i32>> [#uses=2]
338	%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
339	%tmp3 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
340	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
341	%tmp5 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]
342	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0		; <<4 x i32>> [#uses=1]
343	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
344	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
345	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
346	store <4 x i32> %tmp9, <4 x i32>* %A
347	ret void
348}
349
350define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
351; CHECK-LABEL: VMRG_UNARY_tb_l:
352; CHECK:       # %bb.0: # %entry
353; CHECK-NEXT:    lvx 2, 0, 3
354; CHECK-NEXT:    vmrglb 2, 2, 2
355; CHECK-NEXT:    stvx 2, 0, 3
356; CHECK-NEXT:    blr
357entry:
358	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=16]
359	%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
360	%tmp3 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
361	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
362	%tmp5 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
363	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
364	%tmp7 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
365	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
366	%tmp9 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
367	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
368	%tmp11 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
369	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
370	%tmp13 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
371	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
372	%tmp15 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
373	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
374	%tmp17 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
375	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0		; <<16 x i8>> [#uses=1]
376	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
377	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
378	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
379	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
380	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
381	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
382	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
383	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
384	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
385	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
386	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
387	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
388	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
389	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
390	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
391	store <16 x i8> %tmp33, <16 x i8>* %A
392	ret void
393}
394
395define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
396; CHECK-LABEL: VMRG_UNARY_th_l:
397; CHECK:       # %bb.0: # %entry
398; CHECK-NEXT:    lvx 2, 0, 3
399; CHECK-NEXT:    vmrglh 2, 2, 2
400; CHECK-NEXT:    stvx 2, 0, 3
401; CHECK-NEXT:    blr
402entry:
403	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=8]
404	%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
405	%tmp3 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
406	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
407	%tmp5 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
408	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
409	%tmp7 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
410	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
411	%tmp9 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
412	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0		; <<8 x i16>> [#uses=1]
413	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
414	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
415	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
416	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
417	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
418	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
419	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
420	store <8 x i16> %tmp17, <8 x i16>* %A
421	ret void
422}
423
424define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
425; CHECK-LABEL: VMRG_UNARY_tw_l:
426; CHECK:       # %bb.0: # %entry
427; CHECK-NEXT:    lvx 2, 0, 3
428; CHECK-NEXT:    vmrglw 2, 2, 2
429; CHECK-NEXT:    stvx 2, 0, 3
430; CHECK-NEXT:    blr
431entry:
432	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=4]
433	%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
434	%tmp3 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
435	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
436	%tmp5 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
437	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0		; <<4 x i32>> [#uses=1]
438	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
439	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
440	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
441	store <4 x i32> %tmp9, <4 x i32>* %A
442	ret void
443}
444
445define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
446; CHECK-LABEL: VMRG_UNARY_tb_h:
447; CHECK:       # %bb.0: # %entry
448; CHECK-NEXT:    lvx 2, 0, 3
449; CHECK-NEXT:    vmrghb 2, 2, 2
450; CHECK-NEXT:    stvx 2, 0, 3
451; CHECK-NEXT:    blr
452entry:
453	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=16]
454	%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
455	%tmp3 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
456	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
457	%tmp5 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
458	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
459	%tmp7 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
460	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
461	%tmp9 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
462	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
463	%tmp11 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
464	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
465	%tmp13 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
466	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
467	%tmp15 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
468	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
469	%tmp17 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
470	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0		; <<16 x i8>> [#uses=1]
471	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
472	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
473	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
474	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
475	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
476	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
477	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
478	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
479	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
480	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
481	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
482	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
483	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
484	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
485	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
486	store <16 x i8> %tmp33, <16 x i8>* %A
487	ret void
488}
489
490define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
491; CHECK-LABEL: VMRG_UNARY_th_h:
492; CHECK:       # %bb.0: # %entry
493; CHECK-NEXT:    lvx 2, 0, 3
494; CHECK-NEXT:    vmrghh 2, 2, 2
495; CHECK-NEXT:    stvx 2, 0, 3
496; CHECK-NEXT:    blr
497entry:
498	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=8]
499	%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
500	%tmp3 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
501	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
502	%tmp5 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
503	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
504	%tmp7 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
505	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
506	%tmp9 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
507	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0		; <<8 x i16>> [#uses=1]
508	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
509	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
510	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
511	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
512	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
513	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
514	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
515	store <8 x i16> %tmp17, <8 x i16>* %A
516	ret void
517}
518
519define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
520; CHECK-LABEL: VMRG_UNARY_tw_h:
521; CHECK:       # %bb.0: # %entry
522; CHECK-NEXT:    lvx 2, 0, 3
523; CHECK-NEXT:    vmrghw 2, 2, 2
524; CHECK-NEXT:    stvx 2, 0, 3
525; CHECK-NEXT:    blr
526entry:
527	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=4]
528	%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
529	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
530	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
531	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
532	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0		; <<4 x i32>> [#uses=1]
533	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
534	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
535	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
536	store <4 x i32> %tmp9, <4 x i32>* %A
537	ret void
538}
539
540define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
541; CHECK-LABEL: VPCKUHUM_unary:
542; CHECK:       # %bb.0: # %entry
543; CHECK-NEXT:    lvx 2, 0, 3
544; CHECK-NEXT:    vpkuhum 2, 2, 2
545; CHECK-NEXT:    stvx 2, 0, 3
546; CHECK-NEXT:    blr
547entry:
548	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=2]
549	%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
550	%tmp3 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
551	%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1		; <i8> [#uses=1]
552	%tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3		; <i8> [#uses=1]
553	%tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5		; <i8> [#uses=1]
554	%tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7		; <i8> [#uses=1]
555	%tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9		; <i8> [#uses=1]
556	%tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11		; <i8> [#uses=1]
557	%tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13		; <i8> [#uses=1]
558	%tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15		; <i8> [#uses=1]
559	%tmp11 = extractelement <16 x i8> %tmp3, i32 1		; <i8> [#uses=1]
560	%tmp12 = extractelement <16 x i8> %tmp3, i32 3		; <i8> [#uses=1]
561	%tmp13 = extractelement <16 x i8> %tmp3, i32 5		; <i8> [#uses=1]
562	%tmp14 = extractelement <16 x i8> %tmp3, i32 7		; <i8> [#uses=1]
563	%tmp15 = extractelement <16 x i8> %tmp3, i32 9		; <i8> [#uses=1]
564	%tmp16 = extractelement <16 x i8> %tmp3, i32 11		; <i8> [#uses=1]
565	%tmp17 = extractelement <16 x i8> %tmp3, i32 13		; <i8> [#uses=1]
566	%tmp18 = extractelement <16 x i8> %tmp3, i32 15		; <i8> [#uses=1]
567	%tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0		; <<16 x i8>> [#uses=1]
568	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1		; <<16 x i8>> [#uses=1]
569	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2		; <<16 x i8>> [#uses=1]
570	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3		; <<16 x i8>> [#uses=1]
571	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4		; <<16 x i8>> [#uses=1]
572	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5		; <<16 x i8>> [#uses=1]
573	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6		; <<16 x i8>> [#uses=1]
574	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7		; <<16 x i8>> [#uses=1]
575	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8		; <<16 x i8>> [#uses=1]
576	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9		; <<16 x i8>> [#uses=1]
577	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10		; <<16 x i8>> [#uses=1]
578	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11		; <<16 x i8>> [#uses=1]
579	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12		; <<16 x i8>> [#uses=1]
580	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13		; <<16 x i8>> [#uses=1]
581	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14		; <<16 x i8>> [#uses=1]
582	%tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15		; <<16 x i8>> [#uses=1]
583	%tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16>		; <<8 x i16>> [#uses=1]
584	store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A
585	ret void
586}
587
588define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
589; CHECK-LABEL: VPCKUWUM_unary:
590; CHECK:       # %bb.0: # %entry
591; CHECK-NEXT:    lvx 2, 0, 3
592; CHECK-NEXT:    vpkuwum 2, 2, 2
593; CHECK-NEXT:    stvx 2, 0, 3
594; CHECK-NEXT:    blr
595entry:
596	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
597	%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
598	%tmp3 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
599	%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1		; <i16> [#uses=1]
600	%tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3		; <i16> [#uses=1]
601	%tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5		; <i16> [#uses=1]
602	%tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7		; <i16> [#uses=1]
603	%tmp7 = extractelement <8 x i16> %tmp3, i32 1		; <i16> [#uses=1]
604	%tmp8 = extractelement <8 x i16> %tmp3, i32 3		; <i16> [#uses=1]
605	%tmp9 = extractelement <8 x i16> %tmp3, i32 5		; <i16> [#uses=1]
606	%tmp10 = extractelement <8 x i16> %tmp3, i32 7		; <i16> [#uses=1]
607	%tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0		; <<8 x i16>> [#uses=1]
608	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1		; <<8 x i16>> [#uses=1]
609	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2		; <<8 x i16>> [#uses=1]
610	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3		; <<8 x i16>> [#uses=1]
611	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4		; <<8 x i16>> [#uses=1]
612	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5		; <<8 x i16>> [#uses=1]
613	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6		; <<8 x i16>> [#uses=1]
614	%tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7		; <<8 x i16>> [#uses=1]
615	%tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32>		; <<4 x i32>> [#uses=1]
616	store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A
617	ret void
618}
619