1; RUN: opt < %s -instcombine | \
2; RUN:   llc -march=ppc32 -mcpu=g5 | not grep vperm
3; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
4; RUN: grep vsldoi  %t | count 2
5; RUN: grep vmrgh   %t | count 7
6; RUN: grep vmrgl   %t | count 6
7; RUN: grep vpkuhum %t | count 1
8; RUN: grep vpkuwum %t | count 1
9
10define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
11entry:
12	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
13	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]
14	%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
15	%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
16	%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5		; <i8> [#uses=1]
17	%tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6		; <i8> [#uses=1]
18	%tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7		; <i8> [#uses=1]
19	%tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8		; <i8> [#uses=1]
20	%tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9		; <i8> [#uses=1]
21	%tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10		; <i8> [#uses=1]
22	%tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11		; <i8> [#uses=1]
23	%tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12		; <i8> [#uses=1]
24	%tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13		; <i8> [#uses=1]
25	%tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14		; <i8> [#uses=1]
26	%tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15		; <i8> [#uses=1]
27	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0		; <i8> [#uses=1]
28	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1		; <i8> [#uses=1]
29	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2		; <i8> [#uses=1]
30	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3		; <i8> [#uses=1]
31	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4		; <i8> [#uses=1]
32	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0		; <<16 x i8>> [#uses=1]
33	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
34	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
35	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
36	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
37	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
38	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
39	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
40	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
41	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
42	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
43	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
44	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
45	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
46	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
47	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
48	%tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]
49	store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A
50	ret void
51}
52
53define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
54	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
55	%tmp2 = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
56	%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
57	%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
58	%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5		; <i8> [#uses=1]
59	%tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6		; <i8> [#uses=1]
60	%tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7		; <i8> [#uses=1]
61	%tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8		; <i8> [#uses=1]
62	%tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9		; <i8> [#uses=1]
63	%tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10		; <i8> [#uses=1]
64	%tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11		; <i8> [#uses=1]
65	%tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12		; <i8> [#uses=1]
66	%tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13		; <i8> [#uses=1]
67	%tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14		; <i8> [#uses=1]
68	%tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15		; <i8> [#uses=1]
69	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0		; <i8> [#uses=1]
70	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1		; <i8> [#uses=1]
71	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2		; <i8> [#uses=1]
72	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3		; <i8> [#uses=1]
73	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4		; <i8> [#uses=1]
74	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0		; <<16 x i8>> [#uses=1]
75	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
76	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
77	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
78	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
79	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
80	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
81	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
82	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
83	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
84	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
85	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
86	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
87	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
88	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
89	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
90	%tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]
91	store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A
92	ret void
93}
94
95define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
96entry:
97	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
98	%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
99	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]
100	%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
101	%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > )		; <<4 x i32>> [#uses=1]
102	%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16>		; <<8 x i16>> [#uses=1]
103	store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A
104	ret void
105}
106
107declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
108
109define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
110entry:
111	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]
112	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]
113	%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
114	%tmp3 = extractelement <16 x i8> %tmp2, i32 8		; <i8> [#uses=1]
115	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
116	%tmp5 = extractelement <16 x i8> %tmp2, i32 9		; <i8> [#uses=1]
117	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
118	%tmp7 = extractelement <16 x i8> %tmp2, i32 10		; <i8> [#uses=1]
119	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
120	%tmp9 = extractelement <16 x i8> %tmp2, i32 11		; <i8> [#uses=1]
121	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
122	%tmp11 = extractelement <16 x i8> %tmp2, i32 12		; <i8> [#uses=1]
123	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
124	%tmp13 = extractelement <16 x i8> %tmp2, i32 13		; <i8> [#uses=1]
125	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
126	%tmp15 = extractelement <16 x i8> %tmp2, i32 14		; <i8> [#uses=1]
127	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
128	%tmp17 = extractelement <16 x i8> %tmp2, i32 15		; <i8> [#uses=1]
129	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0		; <<16 x i8>> [#uses=1]
130	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
131	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
132	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
133	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
134	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
135	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
136	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
137	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
138	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
139	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
140	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
141	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
142	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
143	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
144	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
145	store <16 x i8> %tmp33, <16 x i8>* %A
146	ret void
147}
148
149define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
150entry:
151	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]
152	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]
153	%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
154	%tmp3 = extractelement <8 x i16> %tmp2, i32 4		; <i16> [#uses=1]
155	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
156	%tmp5 = extractelement <8 x i16> %tmp2, i32 5		; <i16> [#uses=1]
157	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
158	%tmp7 = extractelement <8 x i16> %tmp2, i32 6		; <i16> [#uses=1]
159	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
160	%tmp9 = extractelement <8 x i16> %tmp2, i32 7		; <i16> [#uses=1]
161	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0		; <<8 x i16>> [#uses=1]
162	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
163	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
164	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
165	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
166	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
167	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
168	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
169	store <8 x i16> %tmp17, <8 x i16>* %A
170	ret void
171}
172
173define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
174entry:
175	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
176	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
177	%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
178	%tmp3 = extractelement <4 x i32> %tmp2, i32 2		; <i32> [#uses=1]
179	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
180	%tmp5 = extractelement <4 x i32> %tmp2, i32 3		; <i32> [#uses=1]
181	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0		; <<4 x i32>> [#uses=1]
182	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
183	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
184	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
185	store <4 x i32> %tmp9, <4 x i32>* %A
186	ret void
187}
188
189define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
190entry:
191	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]
192	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]
193	%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
194	%tmp3 = extractelement <16 x i8> %tmp2, i32 0		; <i8> [#uses=1]
195	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
196	%tmp5 = extractelement <16 x i8> %tmp2, i32 1		; <i8> [#uses=1]
197	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
198	%tmp7 = extractelement <16 x i8> %tmp2, i32 2		; <i8> [#uses=1]
199	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
200	%tmp9 = extractelement <16 x i8> %tmp2, i32 3		; <i8> [#uses=1]
201	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
202	%tmp11 = extractelement <16 x i8> %tmp2, i32 4		; <i8> [#uses=1]
203	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
204	%tmp13 = extractelement <16 x i8> %tmp2, i32 5		; <i8> [#uses=1]
205	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
206	%tmp15 = extractelement <16 x i8> %tmp2, i32 6		; <i8> [#uses=1]
207	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
208	%tmp17 = extractelement <16 x i8> %tmp2, i32 7		; <i8> [#uses=1]
209	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0		; <<16 x i8>> [#uses=1]
210	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
211	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
212	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
213	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
214	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
215	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
216	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
217	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
218	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
219	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
220	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
221	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
222	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
223	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
224	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
225	store <16 x i8> %tmp33, <16 x i8>* %A
226	ret void
227}
228
229define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
230entry:
231	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]
232	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]
233	%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
234	%tmp3 = extractelement <8 x i16> %tmp2, i32 0		; <i16> [#uses=1]
235	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
236	%tmp5 = extractelement <8 x i16> %tmp2, i32 1		; <i16> [#uses=1]
237	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
238	%tmp7 = extractelement <8 x i16> %tmp2, i32 2		; <i16> [#uses=1]
239	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
240	%tmp9 = extractelement <8 x i16> %tmp2, i32 3		; <i16> [#uses=1]
241	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0		; <<8 x i16>> [#uses=1]
242	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
243	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
244	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
245	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
246	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
247	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
248	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
249	store <8 x i16> %tmp17, <8 x i16>* %A
250	ret void
251}
252
253define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
254entry:
255	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
256	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
257	%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
258	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
259	%tmp4 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]
260	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
261	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0		; <<4 x i32>> [#uses=1]
262	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
263	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
264	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
265	store <4 x i32> %tmp9, <4 x i32>* %A
266	ret void
267}
268
269define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
270	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
271	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
272	%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
273	%tmp3 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
274	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
275	%tmp5 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]
276	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0		; <<4 x i32>> [#uses=1]
277	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
278	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
279	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
280	store <4 x i32> %tmp9, <4 x i32>* %A
281	ret void
282}
283
284define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
285entry:
286	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]
287	%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
288	%tmp3 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
289	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
290	%tmp5 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
291	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
292	%tmp7 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
293	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
294	%tmp9 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
295	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
296	%tmp11 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
297	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
298	%tmp13 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
299	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
300	%tmp15 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
301	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
302	%tmp17 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
303	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0		; <<16 x i8>> [#uses=1]
304	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
305	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
306	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
307	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
308	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
309	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
310	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
311	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
312	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
313	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
314	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
315	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
316	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
317	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
318	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
319	store <16 x i8> %tmp33, <16 x i8>* %A
320	ret void
321}
322
323define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
324entry:
325	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]
326	%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
327	%tmp3 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
328	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
329	%tmp5 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
330	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
331	%tmp7 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
332	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
333	%tmp9 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
334	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0		; <<8 x i16>> [#uses=1]
335	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
336	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
337	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
338	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
339	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
340	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
341	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
342	store <8 x i16> %tmp17, <8 x i16>* %A
343	ret void
344}
345
346define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
347entry:
348	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]
349	%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
350	%tmp3 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
351	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
352	%tmp5 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
353	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0		; <<4 x i32>> [#uses=1]
354	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
355	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
356	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
357	store <4 x i32> %tmp9, <4 x i32>* %A
358	ret void
359}
360
361define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
362entry:
363	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]
364	%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
365	%tmp3 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
366	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
367	%tmp5 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
368	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
369	%tmp7 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
370	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
371	%tmp9 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
372	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
373	%tmp11 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
374	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
375	%tmp13 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
376	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
377	%tmp15 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
378	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
379	%tmp17 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
380	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0		; <<16 x i8>> [#uses=1]
381	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
382	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
383	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
384	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
385	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
386	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
387	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
388	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
389	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
390	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
391	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
392	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
393	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
394	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
395	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
396	store <16 x i8> %tmp33, <16 x i8>* %A
397	ret void
398}
399
400define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
401entry:
402	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]
403	%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
404	%tmp3 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
405	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
406	%tmp5 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
407	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
408	%tmp7 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
409	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
410	%tmp9 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
411	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0		; <<8 x i16>> [#uses=1]
412	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
413	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
414	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
415	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
416	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
417	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
418	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
419	store <8 x i16> %tmp17, <8 x i16>* %A
420	ret void
421}
422
423define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
424entry:
425	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]
426	%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
427	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
428	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
429	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
430	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0		; <<4 x i32>> [#uses=1]
431	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
432	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
433	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
434	store <4 x i32> %tmp9, <4 x i32>* %A
435	ret void
436}
437
438define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
439entry:
440	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=2]
441	%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
442	%tmp3 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
443	%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1		; <i8> [#uses=1]
444	%tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3		; <i8> [#uses=1]
445	%tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5		; <i8> [#uses=1]
446	%tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7		; <i8> [#uses=1]
447	%tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9		; <i8> [#uses=1]
448	%tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11		; <i8> [#uses=1]
449	%tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13		; <i8> [#uses=1]
450	%tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15		; <i8> [#uses=1]
451	%tmp11 = extractelement <16 x i8> %tmp3, i32 1		; <i8> [#uses=1]
452	%tmp12 = extractelement <16 x i8> %tmp3, i32 3		; <i8> [#uses=1]
453	%tmp13 = extractelement <16 x i8> %tmp3, i32 5		; <i8> [#uses=1]
454	%tmp14 = extractelement <16 x i8> %tmp3, i32 7		; <i8> [#uses=1]
455	%tmp15 = extractelement <16 x i8> %tmp3, i32 9		; <i8> [#uses=1]
456	%tmp16 = extractelement <16 x i8> %tmp3, i32 11		; <i8> [#uses=1]
457	%tmp17 = extractelement <16 x i8> %tmp3, i32 13		; <i8> [#uses=1]
458	%tmp18 = extractelement <16 x i8> %tmp3, i32 15		; <i8> [#uses=1]
459	%tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0		; <<16 x i8>> [#uses=1]
460	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1		; <<16 x i8>> [#uses=1]
461	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2		; <<16 x i8>> [#uses=1]
462	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3		; <<16 x i8>> [#uses=1]
463	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4		; <<16 x i8>> [#uses=1]
464	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5		; <<16 x i8>> [#uses=1]
465	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6		; <<16 x i8>> [#uses=1]
466	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7		; <<16 x i8>> [#uses=1]
467	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8		; <<16 x i8>> [#uses=1]
468	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9		; <<16 x i8>> [#uses=1]
469	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10		; <<16 x i8>> [#uses=1]
470	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11		; <<16 x i8>> [#uses=1]
471	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12		; <<16 x i8>> [#uses=1]
472	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13		; <<16 x i8>> [#uses=1]
473	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14		; <<16 x i8>> [#uses=1]
474	%tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15		; <<16 x i8>> [#uses=1]
475	%tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16>		; <<8 x i16>> [#uses=1]
476	store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A
477	ret void
478}
479
480define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
481entry:
482	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
483	%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
484	%tmp3 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
485	%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1		; <i16> [#uses=1]
486	%tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3		; <i16> [#uses=1]
487	%tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5		; <i16> [#uses=1]
488	%tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7		; <i16> [#uses=1]
489	%tmp7 = extractelement <8 x i16> %tmp3, i32 1		; <i16> [#uses=1]
490	%tmp8 = extractelement <8 x i16> %tmp3, i32 3		; <i16> [#uses=1]
491	%tmp9 = extractelement <8 x i16> %tmp3, i32 5		; <i16> [#uses=1]
492	%tmp10 = extractelement <8 x i16> %tmp3, i32 7		; <i16> [#uses=1]
493	%tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0		; <<8 x i16>> [#uses=1]
494	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1		; <<8 x i16>> [#uses=1]
495	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2		; <<8 x i16>> [#uses=1]
496	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3		; <<8 x i16>> [#uses=1]
497	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4		; <<8 x i16>> [#uses=1]
498	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5		; <<8 x i16>> [#uses=1]
499	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6		; <<8 x i16>> [#uses=1]
500	%tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7		; <<8 x i16>> [#uses=1]
501	%tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32>		; <<4 x i32>> [#uses=1]
502	store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A
503	ret void
504}
505