1;; VSX patterns.
2;; Copyright (C) 2009-2013 Free Software Foundation, Inc.
3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Iterator for both scalar and vector floating point types supported by VSX
22(define_mode_iterator VSX_B [DF V4SF V2DF])
23
24;; Iterator for the 2 64-bit vector types
25(define_mode_iterator VSX_D [V2DF V2DI])
26
27;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
28;; lxvd2x to properly handle swapping words on little endian
29(define_mode_iterator VSX_LE [V2DF
30			      V2DI
31			      V1TI
32			      (TI	"VECTOR_MEM_VSX_P (TImode)")])
33
34;; Iterator for the 2 32-bit vector types
35(define_mode_iterator VSX_W [V4SF V4SI])
36
37;; Iterator for the DF types
38(define_mode_iterator VSX_DF [V2DF DF])
39
40;; Iterator for vector floating point types supported by VSX
41(define_mode_iterator VSX_F [V4SF V2DF])
42
43;; Iterator for logical types supported by VSX
44(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI])
45
46;; Iterator for memory move.  Handle TImode specially to allow
47;; it to use gprs as well as vsx registers.
48(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI])
49
50(define_mode_iterator VSX_M2 [V16QI
51			      V8HI
52			      V4SI
53			      V2DI
54			      V4SF
55			      V2DF
56			      V1TI
57			      (TI	"TARGET_VSX_TIMODE")])
58
59;; Map into the appropriate load/store name based on the type
60(define_mode_attr VSm  [(V16QI "vw4")
61			(V8HI  "vw4")
62			(V4SI  "vw4")
63			(V4SF  "vw4")
64			(V2DF  "vd2")
65			(V2DI  "vd2")
66			(DF    "d")
67			(V1TI  "vd2")
68			(TI    "vd2")])
69
70;; Map into the appropriate suffix based on the type
71(define_mode_attr VSs	[(V16QI "sp")
72			 (V8HI  "sp")
73			 (V4SI  "sp")
74			 (V4SF  "sp")
75			 (V2DF  "dp")
76			 (V2DI  "dp")
77			 (DF    "dp")
78			 (SF	"sp")
79			 (V1TI  "dp")
80			 (TI    "dp")])
81
82;; Map the register class used
83(define_mode_attr VSr	[(V16QI "v")
84			 (V8HI  "v")
85			 (V4SI  "v")
86			 (V4SF  "wf")
87			 (V2DI  "wd")
88			 (V2DF  "wd")
89			 (DI	"wi")
90			 (DF    "ws")
91			 (SF	"ww")
92			 (V1TI  "v")
93			 (TI    "wt")])
94
95;; Map the register class used for float<->int conversions (floating point side)
96;; VSr2 is the preferred register class, VSr3 is any register class that will
97;; hold the data
98(define_mode_attr VSr2	[(V2DF  "wd")
99			 (V4SF  "wf")
100			 (DF    "ws")
101			 (SF	"ww")
102			 (DI	"wi")])
103
104(define_mode_attr VSr3	[(V2DF  "wa")
105			 (V4SF  "wa")
106			 (DF    "ws")
107			 (SF	"ww")
108			 (DI	"wi")])
109
110;; Map the register class for sp<->dp float conversions, destination
111(define_mode_attr VSr4	[(SF	"ws")
112			 (DF	"f")
113			 (V2DF  "wd")
114			 (V4SF	"v")])
115
116;; Map the register class for sp<->dp float conversions, source
117(define_mode_attr VSr5	[(SF	"ws")
118			 (DF	"f")
119			 (V2DF  "v")
120			 (V4SF	"wd")])
121
122;; The VSX register class that a type can occupy, even if it is not the
123;; preferred register class (VSr is the preferred register class that will get
124;; allocated first).
125(define_mode_attr VSa	[(V16QI "wa")
126			 (V8HI  "wa")
127			 (V4SI  "wa")
128			 (V4SF  "wa")
129			 (V2DI  "wa")
130			 (V2DF  "wa")
131			 (DI	"wi")
132			 (DF    "ws")
133			 (SF	"ww")
134			 (V1TI	"wa")
135			 (TI    "wt")])
136
137;; Same size integer type for floating point data
138(define_mode_attr VSi [(V4SF  "v4si")
139		       (V2DF  "v2di")
140		       (DF    "di")])
141
142(define_mode_attr VSI [(V4SF  "V4SI")
143		       (V2DF  "V2DI")
144		       (DF    "DI")])
145
146;; Word size for same size conversion
147(define_mode_attr VSc [(V4SF "w")
148		       (V2DF "d")
149		       (DF   "d")])
150
151;; Map into either s or v, depending on whether this is a scalar or vector
152;; operation
153(define_mode_attr VSv	[(V16QI "v")
154			 (V8HI  "v")
155			 (V4SI  "v")
156			 (V4SF  "v")
157			 (V2DI  "v")
158			 (V2DF  "v")
159			 (V1TI  "v")
160			 (DF    "s")])
161
162;; Appropriate type for add ops (and other simple FP ops)
163(define_mode_attr VStype_simple	[(V2DF "vecdouble")
164				 (V4SF "vecfloat")
165				 (DF   "fp")])
166
167(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
168				   (V4SF "fp_addsub_s")
169				   (DF   "fp_addsub_d")])
170
171;; Appropriate type for multiply ops
172(define_mode_attr VStype_mul	[(V2DF "vecdouble")
173				 (V4SF "vecfloat")
174				 (DF   "dmul")])
175
176(define_mode_attr VSfptype_mul	[(V2DF "fp_mul_d")
177				 (V4SF "fp_mul_s")
178				 (DF   "fp_mul_d")])
179
180;; Appropriate type for divide ops.
181(define_mode_attr VStype_div	[(V2DF "vecdiv")
182				 (V4SF "vecfdiv")
183				 (DF   "ddiv")])
184
185(define_mode_attr VSfptype_div	[(V2DF "fp_div_d")
186				 (V4SF "fp_div_s")
187				 (DF   "fp_div_d")])
188
189;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
190;; the scalar sqrt
191(define_mode_attr VStype_sqrt	[(V2DF "dsqrt")
192				 (V4SF "ssqrt")
193				 (DF   "dsqrt")])
194
195(define_mode_attr VSfptype_sqrt	[(V2DF "fp_sqrt_d")
196				 (V4SF "fp_sqrt_s")
197				 (DF   "fp_sqrt_d")])
198
199;; Iterator and modes for sp<->dp conversions
200;; Because scalar SF values are represented internally as double, use the
201;; V4SF type to represent this than SF.
202(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
203
204(define_mode_attr VS_spdp_res [(DF	"V4SF")
205			       (V4SF	"V2DF")
206			       (V2DF	"V4SF")])
207
208(define_mode_attr VS_spdp_insn [(DF	"xscvdpsp")
209				(V4SF	"xvcvspdp")
210				(V2DF	"xvcvdpsp")])
211
212(define_mode_attr VS_spdp_type [(DF	"fp")
213				(V4SF	"vecdouble")
214				(V2DF	"vecdouble")])
215
216;; Map the scalar mode for a vector type
217(define_mode_attr VS_scalar [(V1TI	"TI")
218			     (V2DF	"DF")
219			     (V2DI	"DI")
220			     (V4SF	"SF")
221			     (V4SI	"SI")
222			     (V8HI	"HI")
223			     (V16QI	"QI")])
224
225;; Map to a double-sized vector mode
226(define_mode_attr VS_double [(V4SI	"V8SI")
227			     (V4SF	"V8SF")
228			     (V2DI	"V4DI")
229			     (V2DF	"V4DF")
230			     (V1TI	"V2TI")])
231
232;; Map register class for 64-bit element in 128-bit vector for direct moves
233;; to/from gprs
234(define_mode_attr VS_64dm [(V2DF	"wk")
235			   (V2DI	"wj")])
236
237;; Map register class for 64-bit element in 128-bit vector for normal register
238;; to register moves
239(define_mode_attr VS_64reg [(V2DF	"ws")
240			    (V2DI	"wi")])
241
242;; Constants for creating unspecs
243(define_c_enum "unspec"
244  [UNSPEC_VSX_CONCAT
245   UNSPEC_VSX_CVDPSXWS
246   UNSPEC_VSX_CVDPUXWS
247   UNSPEC_VSX_CVSPDP
248   UNSPEC_VSX_CVSPDPN
249   UNSPEC_VSX_CVDPSPN
250   UNSPEC_VSX_CVSXWDP
251   UNSPEC_VSX_CVUXWDP
252   UNSPEC_VSX_CVSXDSP
253   UNSPEC_VSX_CVUXDSP
254   UNSPEC_VSX_CVSPSXDS
255   UNSPEC_VSX_CVSPUXDS
256   UNSPEC_VSX_TDIV
257   UNSPEC_VSX_TSQRT
258   UNSPEC_VSX_SET
259   UNSPEC_VSX_ROUND_I
260   UNSPEC_VSX_ROUND_IC
261   UNSPEC_VSX_SLDWI
262   UNSPEC_VSX_XXSPLTW
263   UNSPEC_VSX_XXSPLTD
264   UNSPEC_VSX_DIVSD
265   UNSPEC_VSX_DIVUD
266   UNSPEC_VSX_MULSD
267   UNSPEC_VSX_XVCVSXDDP
268   UNSPEC_VSX_XVCVUXDDP
269   UNSPEC_VSX_XVCVDPSXDS
270   UNSPEC_VSX_XVCVDPUXDS
271  ])
272
273;; VSX moves
274
275;; The patterns for LE permuted loads and stores come before the general
276;; VSX moves so they match first.
277(define_insn_and_split "*vsx_le_perm_load_<mode>"
278  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
279        (match_operand:VSX_LE 1 "memory_operand" "Z"))]
280  "!BYTES_BIG_ENDIAN && TARGET_VSX"
281  "#"
282  "!BYTES_BIG_ENDIAN && TARGET_VSX"
283  [(set (match_dup 2)
284        (vec_select:<MODE>
285          (match_dup 1)
286          (parallel [(const_int 1) (const_int 0)])))
287   (set (match_dup 0)
288        (vec_select:<MODE>
289          (match_dup 2)
290          (parallel [(const_int 1) (const_int 0)])))]
291  "
292{
293  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
294                                       : operands[0];
295}
296  "
297  [(set_attr "type" "vecload")
298   (set_attr "length" "8")])
299
300(define_insn_and_split "*vsx_le_perm_load_<mode>"
301  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
302        (match_operand:VSX_W 1 "memory_operand" "Z"))]
303  "!BYTES_BIG_ENDIAN && TARGET_VSX"
304  "#"
305  "!BYTES_BIG_ENDIAN && TARGET_VSX"
306  [(set (match_dup 2)
307        (vec_select:<MODE>
308          (match_dup 1)
309          (parallel [(const_int 2) (const_int 3)
310                     (const_int 0) (const_int 1)])))
311   (set (match_dup 0)
312        (vec_select:<MODE>
313          (match_dup 2)
314          (parallel [(const_int 2) (const_int 3)
315                     (const_int 0) (const_int 1)])))]
316  "
317{
318  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
319                                       : operands[0];
320}
321  "
322  [(set_attr "type" "vecload")
323   (set_attr "length" "8")])
324
325(define_insn_and_split "*vsx_le_perm_load_v8hi"
326  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
327        (match_operand:V8HI 1 "memory_operand" "Z"))]
328  "!BYTES_BIG_ENDIAN && TARGET_VSX"
329  "#"
330  "!BYTES_BIG_ENDIAN && TARGET_VSX"
331  [(set (match_dup 2)
332        (vec_select:V8HI
333          (match_dup 1)
334          (parallel [(const_int 4) (const_int 5)
335                     (const_int 6) (const_int 7)
336                     (const_int 0) (const_int 1)
337                     (const_int 2) (const_int 3)])))
338   (set (match_dup 0)
339        (vec_select:V8HI
340          (match_dup 2)
341          (parallel [(const_int 4) (const_int 5)
342                     (const_int 6) (const_int 7)
343                     (const_int 0) (const_int 1)
344                     (const_int 2) (const_int 3)])))]
345  "
346{
347  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
348                                       : operands[0];
349}
350  "
351  [(set_attr "type" "vecload")
352   (set_attr "length" "8")])
353
354(define_insn_and_split "*vsx_le_perm_load_v16qi"
355  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
356        (match_operand:V16QI 1 "memory_operand" "Z"))]
357  "!BYTES_BIG_ENDIAN && TARGET_VSX"
358  "#"
359  "!BYTES_BIG_ENDIAN && TARGET_VSX"
360  [(set (match_dup 2)
361        (vec_select:V16QI
362          (match_dup 1)
363          (parallel [(const_int 8) (const_int 9)
364                     (const_int 10) (const_int 11)
365                     (const_int 12) (const_int 13)
366                     (const_int 14) (const_int 15)
367                     (const_int 0) (const_int 1)
368                     (const_int 2) (const_int 3)
369                     (const_int 4) (const_int 5)
370                     (const_int 6) (const_int 7)])))
371   (set (match_dup 0)
372        (vec_select:V16QI
373          (match_dup 2)
374          (parallel [(const_int 8) (const_int 9)
375                     (const_int 10) (const_int 11)
376                     (const_int 12) (const_int 13)
377                     (const_int 14) (const_int 15)
378                     (const_int 0) (const_int 1)
379                     (const_int 2) (const_int 3)
380                     (const_int 4) (const_int 5)
381                     (const_int 6) (const_int 7)])))]
382  "
383{
384  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
385                                       : operands[0];
386}
387  "
388  [(set_attr "type" "vecload")
389   (set_attr "length" "8")])
390
391(define_insn "*vsx_le_perm_store_<mode>"
392  [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
393        (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
394  "!BYTES_BIG_ENDIAN && TARGET_VSX"
395  "#"
396  [(set_attr "type" "vecstore")
397   (set_attr "length" "12")])
398
399(define_split
400  [(set (match_operand:VSX_LE 0 "memory_operand" "")
401        (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
402  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
403  [(set (match_dup 2)
404        (vec_select:<MODE>
405          (match_dup 1)
406          (parallel [(const_int 1) (const_int 0)])))
407   (set (match_dup 0)
408        (vec_select:<MODE>
409          (match_dup 2)
410          (parallel [(const_int 1) (const_int 0)])))]
411{
412  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
413                                       : operands[1];
414})
415
416;; The post-reload split requires that we re-permute the source
417;; register in case it is still live.
418(define_split
419  [(set (match_operand:VSX_LE 0 "memory_operand" "")
420        (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
421  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
422  [(set (match_dup 1)
423        (vec_select:<MODE>
424          (match_dup 1)
425          (parallel [(const_int 1) (const_int 0)])))
426   (set (match_dup 0)
427        (vec_select:<MODE>
428          (match_dup 1)
429          (parallel [(const_int 1) (const_int 0)])))
430   (set (match_dup 1)
431        (vec_select:<MODE>
432          (match_dup 1)
433          (parallel [(const_int 1) (const_int 0)])))]
434  "")
435
436(define_insn "*vsx_le_perm_store_<mode>"
437  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
438        (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
439  "!BYTES_BIG_ENDIAN && TARGET_VSX"
440  "#"
441  [(set_attr "type" "vecstore")
442   (set_attr "length" "12")])
443
444(define_split
445  [(set (match_operand:VSX_W 0 "memory_operand" "")
446        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
447  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
448  [(set (match_dup 2)
449        (vec_select:<MODE>
450          (match_dup 1)
451          (parallel [(const_int 2) (const_int 3)
452	             (const_int 0) (const_int 1)])))
453   (set (match_dup 0)
454        (vec_select:<MODE>
455          (match_dup 2)
456          (parallel [(const_int 2) (const_int 3)
457	             (const_int 0) (const_int 1)])))]
458{
459  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
460                                       : operands[1];
461})
462
463;; The post-reload split requires that we re-permute the source
464;; register in case it is still live.
465(define_split
466  [(set (match_operand:VSX_W 0 "memory_operand" "")
467        (match_operand:VSX_W 1 "vsx_register_operand" ""))]
468  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
469  [(set (match_dup 1)
470        (vec_select:<MODE>
471          (match_dup 1)
472          (parallel [(const_int 2) (const_int 3)
473	             (const_int 0) (const_int 1)])))
474   (set (match_dup 0)
475        (vec_select:<MODE>
476          (match_dup 1)
477          (parallel [(const_int 2) (const_int 3)
478	             (const_int 0) (const_int 1)])))
479   (set (match_dup 1)
480        (vec_select:<MODE>
481          (match_dup 1)
482          (parallel [(const_int 2) (const_int 3)
483	             (const_int 0) (const_int 1)])))]
484  "")
485
486(define_insn "*vsx_le_perm_store_v8hi"
487  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
488        (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
489  "!BYTES_BIG_ENDIAN && TARGET_VSX"
490  "#"
491  [(set_attr "type" "vecstore")
492   (set_attr "length" "12")])
493
494(define_split
495  [(set (match_operand:V8HI 0 "memory_operand" "")
496        (match_operand:V8HI 1 "vsx_register_operand" ""))]
497  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
498  [(set (match_dup 2)
499        (vec_select:V8HI
500          (match_dup 1)
501          (parallel [(const_int 4) (const_int 5)
502                     (const_int 6) (const_int 7)
503                     (const_int 0) (const_int 1)
504                     (const_int 2) (const_int 3)])))
505   (set (match_dup 0)
506        (vec_select:V8HI
507          (match_dup 2)
508          (parallel [(const_int 4) (const_int 5)
509                     (const_int 6) (const_int 7)
510                     (const_int 0) (const_int 1)
511                     (const_int 2) (const_int 3)])))]
512{
513  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
514                                       : operands[1];
515})
516
517;; The post-reload split requires that we re-permute the source
518;; register in case it is still live.
519(define_split
520  [(set (match_operand:V8HI 0 "memory_operand" "")
521        (match_operand:V8HI 1 "vsx_register_operand" ""))]
522  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
523  [(set (match_dup 1)
524        (vec_select:V8HI
525          (match_dup 1)
526          (parallel [(const_int 4) (const_int 5)
527                     (const_int 6) (const_int 7)
528                     (const_int 0) (const_int 1)
529                     (const_int 2) (const_int 3)])))
530   (set (match_dup 0)
531        (vec_select:V8HI
532          (match_dup 1)
533          (parallel [(const_int 4) (const_int 5)
534                     (const_int 6) (const_int 7)
535                     (const_int 0) (const_int 1)
536                     (const_int 2) (const_int 3)])))
537   (set (match_dup 1)
538        (vec_select:V8HI
539          (match_dup 1)
540          (parallel [(const_int 4) (const_int 5)
541                     (const_int 6) (const_int 7)
542                     (const_int 0) (const_int 1)
543                     (const_int 2) (const_int 3)])))]
544  "")
545
546(define_insn "*vsx_le_perm_store_v16qi"
547  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
548        (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
549  "!BYTES_BIG_ENDIAN && TARGET_VSX"
550  "#"
551  [(set_attr "type" "vecstore")
552   (set_attr "length" "12")])
553
554(define_split
555  [(set (match_operand:V16QI 0 "memory_operand" "")
556        (match_operand:V16QI 1 "vsx_register_operand" ""))]
557  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
558  [(set (match_dup 2)
559        (vec_select:V16QI
560          (match_dup 1)
561          (parallel [(const_int 8) (const_int 9)
562                     (const_int 10) (const_int 11)
563                     (const_int 12) (const_int 13)
564                     (const_int 14) (const_int 15)
565                     (const_int 0) (const_int 1)
566                     (const_int 2) (const_int 3)
567                     (const_int 4) (const_int 5)
568                     (const_int 6) (const_int 7)])))
569   (set (match_dup 0)
570        (vec_select:V16QI
571          (match_dup 2)
572          (parallel [(const_int 8) (const_int 9)
573                     (const_int 10) (const_int 11)
574                     (const_int 12) (const_int 13)
575                     (const_int 14) (const_int 15)
576                     (const_int 0) (const_int 1)
577                     (const_int 2) (const_int 3)
578                     (const_int 4) (const_int 5)
579                     (const_int 6) (const_int 7)])))]
580{
581  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
582                                       : operands[1];
583})
584
585;; The post-reload split requires that we re-permute the source
586;; register in case it is still live.
587(define_split
588  [(set (match_operand:V16QI 0 "memory_operand" "")
589        (match_operand:V16QI 1 "vsx_register_operand" ""))]
590  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
591  [(set (match_dup 1)
592        (vec_select:V16QI
593          (match_dup 1)
594          (parallel [(const_int 8) (const_int 9)
595                     (const_int 10) (const_int 11)
596                     (const_int 12) (const_int 13)
597                     (const_int 14) (const_int 15)
598                     (const_int 0) (const_int 1)
599                     (const_int 2) (const_int 3)
600                     (const_int 4) (const_int 5)
601                     (const_int 6) (const_int 7)])))
602   (set (match_dup 0)
603        (vec_select:V16QI
604          (match_dup 1)
605          (parallel [(const_int 8) (const_int 9)
606                     (const_int 10) (const_int 11)
607                     (const_int 12) (const_int 13)
608                     (const_int 14) (const_int 15)
609                     (const_int 0) (const_int 1)
610                     (const_int 2) (const_int 3)
611                     (const_int 4) (const_int 5)
612                     (const_int 6) (const_int 7)])))
613   (set (match_dup 1)
614        (vec_select:V16QI
615          (match_dup 1)
616          (parallel [(const_int 8) (const_int 9)
617                     (const_int 10) (const_int 11)
618                     (const_int 12) (const_int 13)
619                     (const_int 14) (const_int 15)
620                     (const_int 0) (const_int 1)
621                     (const_int 2) (const_int 3)
622                     (const_int 4) (const_int 5)
623                     (const_int 6) (const_int 7)])))]
624  "")
625
626
627(define_insn "*vsx_mov<mode>"
628  [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?<VSa>,?<VSa>,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ, v")
629	(match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,<VSa>,Z,<VSa>,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
630  "VECTOR_MEM_VSX_P (<MODE>mode)
631   && (register_operand (operands[0], <MODE>mode)
632       || register_operand (operands[1], <MODE>mode))"
633{
634  return rs6000_output_move_128bit (operands);
635}
636  [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
637   (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
638
639;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
640;; use of TImode is for unions.  However for plain data movement, slightly
641;; favor the vector loads
642(define_insn "*vsx_movti_64bit"
643  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
644	(match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
645  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
646   && (register_operand (operands[0], TImode)
647       || register_operand (operands[1], TImode))"
648{
649  return rs6000_output_move_128bit (operands);
650}
651  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
652   (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
653
654(define_insn "*vsx_movti_32bit"
655  [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
656	(match_operand:TI 1 "input_operand"        "wa, Z,wa, O,W,wZ, v,r,r,    Q,    Y,    r,n"))]
657  "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
658   && (register_operand (operands[0], TImode)
659       || register_operand (operands[1], TImode))"
660{
661  switch (which_alternative)
662    {
663    case 0:
664      return "stxvd2x %x1,%y0";
665
666    case 1:
667      return "lxvd2x %x0,%y1";
668
669    case 2:
670      return "xxlor %x0,%x1,%x1";
671
672    case 3:
673      return "xxlxor %x0,%x0,%x0";
674
675    case 4:
676      return output_vec_const_move (operands);
677
678    case 5:
679      return "stvx %1,%y0";
680
681    case 6:
682      return "lvx %0,%y1";
683
684    case 7:
685      if (TARGET_STRING)
686        return \"stswi %1,%P0,16\";
687
688    case 8:
689      return \"#\";
690
691    case 9:
692      /* If the address is not used in the output, we can use lsi.  Otherwise,
693	 fall through to generating four loads.  */
694      if (TARGET_STRING
695          && ! reg_overlap_mentioned_p (operands[0], operands[1]))
696	return \"lswi %0,%P1,16\";
697      /* ... fall through ...  */
698
699    case 10:
700    case 11:
701    case 12:
702      return \"#\";
703    default:
704      gcc_unreachable ();
705    }
706}
707  [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *")
708   (set_attr "length" "     4,      4,        4,       4,         8,       4,      4,      16,      16,     16,     16,16,16")
709   (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
710   			                  (const_string "always")
711					  (const_string "conditional")))])
712
713;; Explicit  load/store expanders for the builtin functions
714(define_expand "vsx_load_<mode>"
715  [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
716	(match_operand:VSX_M 1 "memory_operand" ""))]
717  "VECTOR_MEM_VSX_P (<MODE>mode)"
718  "")
719
720(define_expand "vsx_store_<mode>"
721  [(set (match_operand:VSX_M 0 "memory_operand" "")
722	(match_operand:VSX_M 1 "vsx_register_operand" ""))]
723  "VECTOR_MEM_VSX_P (<MODE>mode)"
724  "")
725
726
727;; VSX vector floating point arithmetic instructions.  The VSX scalar
728;; instructions are now combined with the insn for the traditional floating
729;; point unit.
730(define_insn "*vsx_add<mode>3"
731  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
732        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
733		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
734  "VECTOR_UNIT_VSX_P (<MODE>mode)"
735  "xvadd<VSs> %x0,%x1,%x2"
736  [(set_attr "type" "<VStype_simple>")
737   (set_attr "fp_type" "<VSfptype_simple>")])
738
739(define_insn "*vsx_sub<mode>3"
740  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
741        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
742		     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
743  "VECTOR_UNIT_VSX_P (<MODE>mode)"
744  "xvsub<VSs> %x0,%x1,%x2"
745  [(set_attr "type" "<VStype_simple>")
746   (set_attr "fp_type" "<VSfptype_simple>")])
747
748(define_insn "*vsx_mul<mode>3"
749  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
750        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
751		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
752  "VECTOR_UNIT_VSX_P (<MODE>mode)"
753  "xvmul<VSs> %x0,%x1,%x2"
754  [(set_attr "type" "<VStype_simple>")
755   (set_attr "fp_type" "<VSfptype_mul>")])
756
757; Emulate vector with scalar for vec_mul in V2DImode
758(define_insn_and_split "vsx_mul_v2di"
759  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
760        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
761                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
762                     UNSPEC_VSX_MULSD))]
763  "VECTOR_MEM_VSX_P (V2DImode)"
764  "#"
765  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
766  [(const_int 0)]
767  "
768{
769  rtx op0 = operands[0];
770  rtx op1 = operands[1];
771  rtx op2 = operands[2];
772  rtx op3 = gen_reg_rtx (DImode);
773  rtx op4 = gen_reg_rtx (DImode);
774  rtx op5 = gen_reg_rtx (DImode);
775  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
776  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
777  emit_insn (gen_muldi3 (op5, op3, op4));
778  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
779  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
780  emit_insn (gen_muldi3 (op3, op3, op4));
781  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
782}"
783  [(set_attr "type" "vecdouble")])
784
785(define_insn "*vsx_div<mode>3"
786  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
787        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
788		   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
789  "VECTOR_UNIT_VSX_P (<MODE>mode)"
790  "xvdiv<VSs> %x0,%x1,%x2"
791  [(set_attr "type" "<VStype_div>")
792   (set_attr "fp_type" "<VSfptype_div>")])
793
794; Emulate vector with scalar for vec_div in V2DImode
795(define_insn_and_split "vsx_div_v2di"
796  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
797        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
798                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
799                     UNSPEC_VSX_DIVSD))]
800  "VECTOR_MEM_VSX_P (V2DImode)"
801  "#"
802  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
803  [(const_int 0)]
804  "
805{
806  rtx op0 = operands[0];
807  rtx op1 = operands[1];
808  rtx op2 = operands[2];
809  rtx op3 = gen_reg_rtx (DImode);
810  rtx op4 = gen_reg_rtx (DImode);
811  rtx op5 = gen_reg_rtx (DImode);
812  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
813  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
814  emit_insn (gen_divdi3 (op5, op3, op4));
815  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
816  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
817  emit_insn (gen_divdi3 (op3, op3, op4));
818  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
819}"
820  [(set_attr "type" "vecdiv")])
821
822(define_insn_and_split "vsx_udiv_v2di"
823  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
824        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
825                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
826                     UNSPEC_VSX_DIVUD))]
827  "VECTOR_MEM_VSX_P (V2DImode)"
828  "#"
829  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
830  [(const_int 0)]
831  "
832{
833  rtx op0 = operands[0];
834  rtx op1 = operands[1];
835  rtx op2 = operands[2];
836  rtx op3 = gen_reg_rtx (DImode);
837  rtx op4 = gen_reg_rtx (DImode);
838  rtx op5 = gen_reg_rtx (DImode);
839  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
840  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
841  emit_insn (gen_udivdi3 (op5, op3, op4));
842  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
843  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
844  emit_insn (gen_udivdi3 (op3, op3, op4));
845  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
846}"
847  [(set_attr "type" "vecdiv")])
848
849;; *tdiv* instruction returning the FG flag
850(define_expand "vsx_tdiv<mode>3_fg"
851  [(set (match_dup 3)
852	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
853		      (match_operand:VSX_B 2 "vsx_register_operand" "")]
854		     UNSPEC_VSX_TDIV))
855   (set (match_operand:SI 0 "gpc_reg_operand" "")
856	(gt:SI (match_dup 3)
857	       (const_int 0)))]
858  "VECTOR_UNIT_VSX_P (<MODE>mode)"
859{
860  operands[3] = gen_reg_rtx (CCFPmode);
861})
862
863;; *tdiv* instruction returning the FE flag
864(define_expand "vsx_tdiv<mode>3_fe"
865  [(set (match_dup 3)
866	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
867		      (match_operand:VSX_B 2 "vsx_register_operand" "")]
868		     UNSPEC_VSX_TDIV))
869   (set (match_operand:SI 0 "gpc_reg_operand" "")
870	(eq:SI (match_dup 3)
871	       (const_int 0)))]
872  "VECTOR_UNIT_VSX_P (<MODE>mode)"
873{
874  operands[3] = gen_reg_rtx (CCFPmode);
875})
876
877(define_insn "*vsx_tdiv<mode>3_internal"
878  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
879	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
880		      (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
881		   UNSPEC_VSX_TDIV))]
882  "VECTOR_UNIT_VSX_P (<MODE>mode)"
883  "x<VSv>tdiv<VSs> %0,%x1,%x2"
884  [(set_attr "type" "<VStype_simple>")
885   (set_attr "fp_type" "<VSfptype_simple>")])
886
887(define_insn "vsx_fre<mode>2"
888  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
889	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
890		      UNSPEC_FRES))]
891  "VECTOR_UNIT_VSX_P (<MODE>mode)"
892  "xvre<VSs> %x0,%x1"
893  [(set_attr "type" "<VStype_simple>")
894   (set_attr "fp_type" "<VSfptype_simple>")])
895
896(define_insn "*vsx_neg<mode>2"
897  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
898        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
899  "VECTOR_UNIT_VSX_P (<MODE>mode)"
900  "xvneg<VSs> %x0,%x1"
901  [(set_attr "type" "<VStype_simple>")
902   (set_attr "fp_type" "<VSfptype_simple>")])
903
904(define_insn "*vsx_abs<mode>2"
905  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
906        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
907  "VECTOR_UNIT_VSX_P (<MODE>mode)"
908  "xvabs<VSs> %x0,%x1"
909  [(set_attr "type" "<VStype_simple>")
910   (set_attr "fp_type" "<VSfptype_simple>")])
911
912(define_insn "vsx_nabs<mode>2"
913  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
914        (neg:VSX_F
915	 (abs:VSX_F
916	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
917  "VECTOR_UNIT_VSX_P (<MODE>mode)"
918  "xvnabs<VSs> %x0,%x1"
919  [(set_attr "type" "<VStype_simple>")
920   (set_attr "fp_type" "<VSfptype_simple>")])
921
922(define_insn "vsx_smax<mode>3"
923  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
924        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
925		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
926  "VECTOR_UNIT_VSX_P (<MODE>mode)"
927  "xvmax<VSs> %x0,%x1,%x2"
928  [(set_attr "type" "<VStype_simple>")
929   (set_attr "fp_type" "<VSfptype_simple>")])
930
931(define_insn "*vsx_smin<mode>3"
932  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
933        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
934		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
935  "VECTOR_UNIT_VSX_P (<MODE>mode)"
936  "xvmin<VSs> %x0,%x1,%x2"
937  [(set_attr "type" "<VStype_simple>")
938   (set_attr "fp_type" "<VSfptype_simple>")])
939
940(define_insn "*vsx_sqrt<mode>2"
941  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
942        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
943  "VECTOR_UNIT_VSX_P (<MODE>mode)"
944  "xvsqrt<VSs> %x0,%x1"
945  [(set_attr "type" "<VStype_sqrt>")
946   (set_attr "fp_type" "<VSfptype_sqrt>")])
947
948(define_insn "*vsx_rsqrte<mode>2"
949  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
950	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
951		      UNSPEC_RSQRT))]
952  "VECTOR_UNIT_VSX_P (<MODE>mode)"
953  "xvrsqrte<VSs> %x0,%x1"
954  [(set_attr "type" "<VStype_simple>")
955   (set_attr "fp_type" "<VSfptype_simple>")])
956
957;; *tsqrt* returning the fg flag
958(define_expand "vsx_tsqrt<mode>2_fg"
959  [(set (match_dup 3)
960	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
961		     UNSPEC_VSX_TSQRT))
962   (set (match_operand:SI 0 "gpc_reg_operand" "")
963	(gt:SI (match_dup 3)
964	       (const_int 0)))]
965  "VECTOR_UNIT_VSX_P (<MODE>mode)"
966{
967  operands[3] = gen_reg_rtx (CCFPmode);
968})
969
970;; *tsqrt* returning the fe flag
971(define_expand "vsx_tsqrt<mode>2_fe"
972  [(set (match_dup 3)
973	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
974		     UNSPEC_VSX_TSQRT))
975   (set (match_operand:SI 0 "gpc_reg_operand" "")
976	(eq:SI (match_dup 3)
977	       (const_int 0)))]
978  "VECTOR_UNIT_VSX_P (<MODE>mode)"
979{
980  operands[3] = gen_reg_rtx (CCFPmode);
981})
982
983(define_insn "*vsx_tsqrt<mode>2_internal"
984  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
985	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
986		     UNSPEC_VSX_TSQRT))]
987  "VECTOR_UNIT_VSX_P (<MODE>mode)"
988  "x<VSv>tsqrt<VSs> %0,%x1"
989  [(set_attr "type" "<VStype_simple>")
990   (set_attr "fp_type" "<VSfptype_simple>")])
991
992;; Fused vector multiply/add instructions. Support the classical Altivec
993;; versions of fma, which allows the target to be a separate register from the
994;; 3 inputs.  Under VSX, the target must be either the addend or the first
995;; multiply.
996
997(define_insn "*vsx_fmav4sf4"
998  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
999	(fma:V4SF
1000	  (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1001	  (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1002	  (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1003  "VECTOR_UNIT_VSX_P (V4SFmode)"
1004  "@
1005   xvmaddasp %x0,%x1,%x2
1006   xvmaddmsp %x0,%x1,%x3
1007   xvmaddasp %x0,%x1,%x2
1008   xvmaddmsp %x0,%x1,%x3
1009   vmaddfp %0,%1,%2,%3"
1010  [(set_attr "type" "vecfloat")])
1011
1012(define_insn "*vsx_fmav2df4"
1013  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1014	(fma:V2DF
1015	  (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1016	  (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1017	  (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1018  "VECTOR_UNIT_VSX_P (V2DFmode)"
1019  "@
1020   xvmaddadp %x0,%x1,%x2
1021   xvmaddmdp %x0,%x1,%x3
1022   xvmaddadp %x0,%x1,%x2
1023   xvmaddmdp %x0,%x1,%x3"
1024  [(set_attr "type" "vecdouble")])
1025
1026(define_insn "*vsx_fms<mode>4"
1027  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1028	(fma:VSX_F
1029	  (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1030	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1031	  (neg:VSX_F
1032	    (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1033  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1034  "@
1035   xvmsuba<VSs> %x0,%x1,%x2
1036   xvmsubm<VSs> %x0,%x1,%x3
1037   xvmsuba<VSs> %x0,%x1,%x2
1038   xvmsubm<VSs> %x0,%x1,%x3"
1039  [(set_attr "type" "<VStype_mul>")])
1040
1041(define_insn "*vsx_nfma<mode>4"
1042  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1043	(neg:VSX_F
1044	 (fma:VSX_F
1045	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1046	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1047	  (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1048  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1049  "@
1050   xvnmadda<VSs> %x0,%x1,%x2
1051   xvnmaddm<VSs> %x0,%x1,%x3
1052   xvnmadda<VSs> %x0,%x1,%x2
1053   xvnmaddm<VSs> %x0,%x1,%x3"
1054  [(set_attr "type" "<VStype_mul>")
1055   (set_attr "fp_type" "<VSfptype_mul>")])
1056
1057(define_insn "*vsx_nfmsv4sf4"
1058  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1059	(neg:V4SF
1060	 (fma:V4SF
1061	   (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1062	   (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1063	   (neg:V4SF
1064	     (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1065  "VECTOR_UNIT_VSX_P (V4SFmode)"
1066  "@
1067   xvnmsubasp %x0,%x1,%x2
1068   xvnmsubmsp %x0,%x1,%x3
1069   xvnmsubasp %x0,%x1,%x2
1070   xvnmsubmsp %x0,%x1,%x3
1071   vnmsubfp %0,%1,%2,%3"
1072  [(set_attr "type" "vecfloat")])
1073
1074(define_insn "*vsx_nfmsv2df4"
1075  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1076	(neg:V2DF
1077	 (fma:V2DF
1078	   (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1079	   (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1080	   (neg:V2DF
1081	     (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1082  "VECTOR_UNIT_VSX_P (V2DFmode)"
1083  "@
1084   xvnmsubadp %x0,%x1,%x2
1085   xvnmsubmdp %x0,%x1,%x3
1086   xvnmsubadp %x0,%x1,%x2
1087   xvnmsubmdp %x0,%x1,%x3"
1088  [(set_attr "type" "vecdouble")])
1089
1090;; Vector conditional expressions (no scalar version for these instructions)
1091(define_insn "vsx_eq<mode>"
1092  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1093	(eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1094		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1095  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1096  "xvcmpeq<VSs> %x0,%x1,%x2"
1097  [(set_attr "type" "<VStype_simple>")
1098   (set_attr "fp_type" "<VSfptype_simple>")])
1099
1100(define_insn "vsx_gt<mode>"
1101  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1102	(gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1103		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1104  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1105  "xvcmpgt<VSs> %x0,%x1,%x2"
1106  [(set_attr "type" "<VStype_simple>")
1107   (set_attr "fp_type" "<VSfptype_simple>")])
1108
1109(define_insn "*vsx_ge<mode>"
1110  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1111	(ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1112		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1113  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1114  "xvcmpge<VSs> %x0,%x1,%x2"
1115  [(set_attr "type" "<VStype_simple>")
1116   (set_attr "fp_type" "<VSfptype_simple>")])
1117
1118;; Compare vectors producing a vector result and a predicate, setting CR6 to
1119;; indicate a combined status
1120(define_insn "*vsx_eq_<mode>_p"
1121  [(set (reg:CC 74)
1122	(unspec:CC
1123	 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1124		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1125	 UNSPEC_PREDICATE))
1126   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1127	(eq:VSX_F (match_dup 1)
1128		  (match_dup 2)))]
1129  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1130  "xvcmpeq<VSs>. %x0,%x1,%x2"
1131  [(set_attr "type" "<VStype_simple>")])
1132
1133(define_insn "*vsx_gt_<mode>_p"
1134  [(set (reg:CC 74)
1135	(unspec:CC
1136	 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1137		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1138	 UNSPEC_PREDICATE))
1139   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1140	(gt:VSX_F (match_dup 1)
1141		  (match_dup 2)))]
1142  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1143  "xvcmpgt<VSs>. %x0,%x1,%x2"
1144  [(set_attr "type" "<VStype_simple>")])
1145
1146(define_insn "*vsx_ge_<mode>_p"
1147  [(set (reg:CC 74)
1148	(unspec:CC
1149	 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1150		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1151	 UNSPEC_PREDICATE))
1152   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1153	(ge:VSX_F (match_dup 1)
1154		  (match_dup 2)))]
1155  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1156  "xvcmpge<VSs>. %x0,%x1,%x2"
1157  [(set_attr "type" "<VStype_simple>")])
1158
1159;; Vector select
1160(define_insn "*vsx_xxsel<mode>"
1161  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1162	(if_then_else:VSX_L
1163	 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1164		(match_operand:VSX_L 4 "zero_constant" ""))
1165	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1166	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1167  "VECTOR_MEM_VSX_P (<MODE>mode)"
1168  "xxsel %x0,%x3,%x2,%x1"
1169  [(set_attr "type" "vecperm")])
1170
1171(define_insn "*vsx_xxsel<mode>_uns"
1172  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1173	(if_then_else:VSX_L
1174	 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1175		   (match_operand:VSX_L 4 "zero_constant" ""))
1176	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1177	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1178  "VECTOR_MEM_VSX_P (<MODE>mode)"
1179  "xxsel %x0,%x3,%x2,%x1"
1180  [(set_attr "type" "vecperm")])
1181
1182;; Copy sign
1183(define_insn "vsx_copysign<mode>3"
1184  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1185	(unspec:VSX_F
1186	 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1187	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1188	 UNSPEC_COPYSIGN))]
1189  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1190  "xvcpsgn<VSs> %x0,%x2,%x1"
1191  [(set_attr "type" "<VStype_simple>")
1192   (set_attr "fp_type" "<VSfptype_simple>")])
1193
1194;; For the conversions, limit the register class for the integer value to be
1195;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1196;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1197;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1198;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
1199(define_insn "vsx_float<VSi><mode>2"
1200  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1201	(float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1202  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1203  "x<VSv>cvsx<VSc><VSs> %x0,%x1"
1204  [(set_attr "type" "<VStype_simple>")
1205   (set_attr "fp_type" "<VSfptype_simple>")])
1206
1207(define_insn "vsx_floatuns<VSi><mode>2"
1208  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1209	(unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1210  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1211  "x<VSv>cvux<VSc><VSs> %x0,%x1"
1212  [(set_attr "type" "<VStype_simple>")
1213   (set_attr "fp_type" "<VSfptype_simple>")])
1214
1215(define_insn "vsx_fix_trunc<mode><VSi>2"
1216  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1217	(fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1218  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1219  "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1220  [(set_attr "type" "<VStype_simple>")
1221   (set_attr "fp_type" "<VSfptype_simple>")])
1222
1223(define_insn "vsx_fixuns_trunc<mode><VSi>2"
1224  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1225	(unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1226  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1227  "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1228  [(set_attr "type" "<VStype_simple>")
1229   (set_attr "fp_type" "<VSfptype_simple>")])
1230
1231;; Math rounding functions
1232(define_insn "vsx_x<VSv>r<VSs>i"
1233  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1234	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1235		      UNSPEC_VSX_ROUND_I))]
1236  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1237  "x<VSv>r<VSs>i %x0,%x1"
1238  [(set_attr "type" "<VStype_simple>")
1239   (set_attr "fp_type" "<VSfptype_simple>")])
1240
1241(define_insn "vsx_x<VSv>r<VSs>ic"
1242  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1243	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1244		      UNSPEC_VSX_ROUND_IC))]
1245  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1246  "x<VSv>r<VSs>ic %x0,%x1"
1247  [(set_attr "type" "<VStype_simple>")
1248   (set_attr "fp_type" "<VSfptype_simple>")])
1249
1250(define_insn "vsx_btrunc<mode>2"
1251  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1252	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1253  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1254  "xvr<VSs>iz %x0,%x1"
1255  [(set_attr "type" "<VStype_simple>")
1256   (set_attr "fp_type" "<VSfptype_simple>")])
1257
1258(define_insn "*vsx_b2trunc<mode>2"
1259  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1260	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1261		      UNSPEC_FRIZ))]
1262  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1263  "x<VSv>r<VSs>iz %x0,%x1"
1264  [(set_attr "type" "<VStype_simple>")
1265   (set_attr "fp_type" "<VSfptype_simple>")])
1266
1267(define_insn "vsx_floor<mode>2"
1268  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1269	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1270		      UNSPEC_FRIM))]
1271  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1272  "xvr<VSs>im %x0,%x1"
1273  [(set_attr "type" "<VStype_simple>")
1274   (set_attr "fp_type" "<VSfptype_simple>")])
1275
1276(define_insn "vsx_ceil<mode>2"
1277  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1278	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1279		      UNSPEC_FRIP))]
1280  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1281  "xvr<VSs>ip %x0,%x1"
1282  [(set_attr "type" "<VStype_simple>")
1283   (set_attr "fp_type" "<VSfptype_simple>")])
1284
1285
1286;; VSX convert to/from double vector
1287
1288;; Convert between single and double precision
1289;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1290;; scalar single precision instructions internally use the double format.
1291;; Prefer the altivec registers, since we likely will need to do a vperm
1292(define_insn "vsx_<VS_spdp_insn>"
1293  [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1294	(unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1295			      UNSPEC_VSX_CVSPDP))]
1296  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1297  "<VS_spdp_insn> %x0,%x1"
1298  [(set_attr "type" "<VS_spdp_type>")])
1299
1300;; xscvspdp, represent the scalar SF type as V4SF
1301(define_insn "vsx_xscvspdp"
1302  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1303	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1304		   UNSPEC_VSX_CVSPDP))]
1305  "VECTOR_UNIT_VSX_P (V4SFmode)"
1306  "xscvspdp %x0,%x1"
1307  [(set_attr "type" "fp")])
1308
1309;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1310;; format of scalars is actually DF.
1311(define_insn "vsx_xscvdpsp_scalar"
1312  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1313	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1314		     UNSPEC_VSX_CVSPDP))]
1315  "VECTOR_UNIT_VSX_P (V4SFmode)"
1316  "xscvdpsp %x0,%x1"
1317  [(set_attr "type" "fp")])
1318
1319;; Same as vsx_xscvspdp, but use SF as the type
1320(define_insn "vsx_xscvspdp_scalar2"
1321  [(set (match_operand:SF 0 "vsx_register_operand" "=f")
1322	(unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1323		   UNSPEC_VSX_CVSPDP))]
1324  "VECTOR_UNIT_VSX_P (V4SFmode)"
1325  "xscvspdp %x0,%x1"
1326  [(set_attr "type" "fp")])
1327
1328;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1329(define_insn "vsx_xscvdpspn"
1330  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1331	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1332		     UNSPEC_VSX_CVDPSPN))]
1333  "TARGET_XSCVDPSPN"
1334  "xscvdpspn %x0,%x1"
1335  [(set_attr "type" "fp")])
1336
1337(define_insn "vsx_xscvspdpn"
1338  [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1339	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1340		   UNSPEC_VSX_CVSPDPN))]
1341  "TARGET_XSCVSPDPN"
1342  "xscvspdpn %x0,%x1"
1343  [(set_attr "type" "fp")])
1344
1345(define_insn "vsx_xscvdpspn_scalar"
1346  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1347	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1348		     UNSPEC_VSX_CVDPSPN))]
1349  "TARGET_XSCVDPSPN"
1350  "xscvdpspn %x0,%x1"
1351  [(set_attr "type" "fp")])
1352
1353;; Used by direct move to move a SFmode value from GPR to VSX register
1354(define_insn "vsx_xscvspdpn_directmove"
1355  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1356	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1357		   UNSPEC_VSX_CVSPDPN))]
1358  "TARGET_XSCVSPDPN"
1359  "xscvspdpn %x0,%x1"
1360  [(set_attr "type" "fp")])
1361
1362;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1363
1364(define_expand "vsx_xvcvsxddp_scale"
1365  [(match_operand:V2DF 0 "vsx_register_operand" "")
1366   (match_operand:V2DI 1 "vsx_register_operand" "")
1367   (match_operand:QI 2 "immediate_operand" "")]
1368  "VECTOR_UNIT_VSX_P (V2DFmode)"
1369{
1370  rtx op0 = operands[0];
1371  rtx op1 = operands[1];
1372  int scale = INTVAL(operands[2]);
1373  emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1374  if (scale != 0)
1375    rs6000_scale_v2df (op0, op0, -scale);
1376  DONE;
1377})
1378
1379(define_insn "vsx_xvcvsxddp"
1380  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1381        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1382                     UNSPEC_VSX_XVCVSXDDP))]
1383  "VECTOR_UNIT_VSX_P (V2DFmode)"
1384  "xvcvsxddp %x0,%x1"
1385  [(set_attr "type" "vecdouble")])
1386
1387(define_expand "vsx_xvcvuxddp_scale"
1388  [(match_operand:V2DF 0 "vsx_register_operand" "")
1389   (match_operand:V2DI 1 "vsx_register_operand" "")
1390   (match_operand:QI 2 "immediate_operand" "")]
1391  "VECTOR_UNIT_VSX_P (V2DFmode)"
1392{
1393  rtx op0 = operands[0];
1394  rtx op1 = operands[1];
1395  int scale = INTVAL(operands[2]);
1396  emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1397  if (scale != 0)
1398    rs6000_scale_v2df (op0, op0, -scale);
1399  DONE;
1400})
1401
1402(define_insn "vsx_xvcvuxddp"
1403  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1404        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1405                     UNSPEC_VSX_XVCVUXDDP))]
1406  "VECTOR_UNIT_VSX_P (V2DFmode)"
1407  "xvcvuxddp %x0,%x1"
1408  [(set_attr "type" "vecdouble")])
1409
1410(define_expand "vsx_xvcvdpsxds_scale"
1411  [(match_operand:V2DI 0 "vsx_register_operand" "")
1412   (match_operand:V2DF 1 "vsx_register_operand" "")
1413   (match_operand:QI 2 "immediate_operand" "")]
1414  "VECTOR_UNIT_VSX_P (V2DFmode)"
1415{
1416  rtx op0 = operands[0];
1417  rtx op1 = operands[1];
1418  rtx tmp = gen_reg_rtx (V2DFmode);
1419  int scale = INTVAL(operands[2]);
1420  if (scale != 0)
1421    rs6000_scale_v2df (tmp, op1, scale);
1422  emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1423  DONE;
1424})
1425
1426(define_insn "vsx_xvcvdpsxds"
1427  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1428        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1429                     UNSPEC_VSX_XVCVDPSXDS))]
1430  "VECTOR_UNIT_VSX_P (V2DFmode)"
1431  "xvcvdpsxds %x0,%x1"
1432  [(set_attr "type" "vecdouble")])
1433
1434(define_expand "vsx_xvcvdpuxds_scale"
1435  [(match_operand:V2DI 0 "vsx_register_operand" "")
1436   (match_operand:V2DF 1 "vsx_register_operand" "")
1437   (match_operand:QI 2 "immediate_operand" "")]
1438  "VECTOR_UNIT_VSX_P (V2DFmode)"
1439{
1440  rtx op0 = operands[0];
1441  rtx op1 = operands[1];
1442  rtx tmp = gen_reg_rtx (V2DFmode);
1443  int scale = INTVAL(operands[2]);
1444  if (scale != 0)
1445    rs6000_scale_v2df (tmp, op1, scale);
1446  emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1447  DONE;
1448})
1449
1450(define_insn "vsx_xvcvdpuxds"
1451  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1452        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1453                     UNSPEC_VSX_XVCVDPUXDS))]
1454  "VECTOR_UNIT_VSX_P (V2DFmode)"
1455  "xvcvdpuxds %x0,%x1"
1456  [(set_attr "type" "vecdouble")])
1457
1458;; Convert from 64-bit to 32-bit types
1459;; Note, favor the Altivec registers since the usual use of these instructions
1460;; is in vector converts and we need to use the Altivec vperm instruction.
1461
1462(define_insn "vsx_xvcvdpsxws"
1463  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1464	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1465		     UNSPEC_VSX_CVDPSXWS))]
1466  "VECTOR_UNIT_VSX_P (V2DFmode)"
1467  "xvcvdpsxws %x0,%x1"
1468  [(set_attr "type" "vecdouble")])
1469
1470(define_insn "vsx_xvcvdpuxws"
1471  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1472	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1473		     UNSPEC_VSX_CVDPUXWS))]
1474  "VECTOR_UNIT_VSX_P (V2DFmode)"
1475  "xvcvdpuxws %x0,%x1"
1476  [(set_attr "type" "vecdouble")])
1477
1478(define_insn "vsx_xvcvsxdsp"
1479  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1480	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1481		     UNSPEC_VSX_CVSXDSP))]
1482  "VECTOR_UNIT_VSX_P (V2DFmode)"
1483  "xvcvsxdsp %x0,%x1"
1484  [(set_attr "type" "vecfloat")])
1485
1486(define_insn "vsx_xvcvuxdsp"
1487  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1488	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1489		     UNSPEC_VSX_CVUXDSP))]
1490  "VECTOR_UNIT_VSX_P (V2DFmode)"
1491  "xvcvuxwdp %x0,%x1"
1492  [(set_attr "type" "vecdouble")])
1493
1494;; Convert from 32-bit to 64-bit types
1495(define_insn "vsx_xvcvsxwdp"
1496  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1497	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1498		     UNSPEC_VSX_CVSXWDP))]
1499  "VECTOR_UNIT_VSX_P (V2DFmode)"
1500  "xvcvsxwdp %x0,%x1"
1501  [(set_attr "type" "vecdouble")])
1502
1503(define_insn "vsx_xvcvuxwdp"
1504  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1505	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1506		     UNSPEC_VSX_CVUXWDP))]
1507  "VECTOR_UNIT_VSX_P (V2DFmode)"
1508  "xvcvuxwdp %x0,%x1"
1509  [(set_attr "type" "vecdouble")])
1510
1511(define_insn "vsx_xvcvspsxds"
1512  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1513	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1514		     UNSPEC_VSX_CVSPSXDS))]
1515  "VECTOR_UNIT_VSX_P (V2DFmode)"
1516  "xvcvspsxds %x0,%x1"
1517  [(set_attr "type" "vecdouble")])
1518
1519(define_insn "vsx_xvcvspuxds"
1520  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1521	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1522		     UNSPEC_VSX_CVSPUXDS))]
1523  "VECTOR_UNIT_VSX_P (V2DFmode)"
1524  "xvcvspuxds %x0,%x1"
1525  [(set_attr "type" "vecdouble")])
1526
1527;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1528;; since the xsrdpiz instruction does not truncate the value if the floating
1529;; point value is < LONG_MIN or > LONG_MAX.
1530(define_insn "*vsx_float_fix_<mode>2"
1531  [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1532	(float:VSX_DF
1533	 (fix:<VSI>
1534	  (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?<VSa>"))))]
1535  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1536   && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
1537   && !flag_trapping_math && TARGET_FRIZ"
1538  "x<VSv>r<VSs>iz %x0,%x1"
1539  [(set_attr "type" "<VStype_simple>")
1540   (set_attr "fp_type" "<VSfptype_simple>")])
1541
1542
1543;; Permute operations
1544
1545;; Build a V2DF/V2DI vector from two scalars
1546(define_insn "vsx_concat_<mode>"
1547  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1548	(vec_concat:VSX_D
1549	 (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
1550	 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
1551  "VECTOR_MEM_VSX_P (<MODE>mode)"
1552{
1553  if (BYTES_BIG_ENDIAN)
1554    return "xxpermdi %x0,%x1,%x2,0";
1555  else
1556    return "xxpermdi %x0,%x2,%x1,0";
1557}
1558  [(set_attr "type" "vecperm")])
1559
1560;; Special purpose concat using xxpermdi to glue two single precision values
1561;; together, relying on the fact that internally scalar floats are represented
1562;; as doubles.  This is used to initialize a V4SF vector with 4 floats
1563(define_insn "vsx_concat_v2sf"
1564  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1565	(unspec:V2DF
1566	 [(match_operand:SF 1 "vsx_register_operand" "f,f")
1567	  (match_operand:SF 2 "vsx_register_operand" "f,f")]
1568	 UNSPEC_VSX_CONCAT))]
1569  "VECTOR_MEM_VSX_P (V2DFmode)"
1570{
1571  if (BYTES_BIG_ENDIAN)
1572    return "xxpermdi %x0,%x1,%x2,0";
1573  else
1574    return "xxpermdi %x0,%x2,%x1,0";
1575}
1576  [(set_attr "type" "vecperm")])
1577
1578;; xxpermdi for little endian loads and stores.  We need several of
1579;; these since the form of the PARALLEL differs by mode.
1580(define_insn "*vsx_xxpermdi2_le_<mode>"
1581  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1582        (vec_select:VSX_LE
1583          (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1584          (parallel [(const_int 1) (const_int 0)])))]
1585  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1586  "xxpermdi %x0,%x1,%x1,2"
1587  [(set_attr "type" "vecperm")])
1588
1589(define_insn "*vsx_xxpermdi4_le_<mode>"
1590  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1591        (vec_select:VSX_W
1592          (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1593          (parallel [(const_int 2) (const_int 3)
1594                     (const_int 0) (const_int 1)])))]
1595  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1596  "xxpermdi %x0,%x1,%x1,2"
1597  [(set_attr "type" "vecperm")])
1598
1599(define_insn "*vsx_xxpermdi8_le_V8HI"
1600  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1601        (vec_select:V8HI
1602          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1603          (parallel [(const_int 4) (const_int 5)
1604                     (const_int 6) (const_int 7)
1605                     (const_int 0) (const_int 1)
1606                     (const_int 2) (const_int 3)])))]
1607  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1608  "xxpermdi %x0,%x1,%x1,2"
1609  [(set_attr "type" "vecperm")])
1610
1611(define_insn "*vsx_xxpermdi16_le_V16QI"
1612  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1613        (vec_select:V16QI
1614          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1615          (parallel [(const_int 8) (const_int 9)
1616                     (const_int 10) (const_int 11)
1617                     (const_int 12) (const_int 13)
1618                     (const_int 14) (const_int 15)
1619                     (const_int 0) (const_int 1)
1620                     (const_int 2) (const_int 3)
1621                     (const_int 4) (const_int 5)
1622                     (const_int 6) (const_int 7)])))]
1623  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1624  "xxpermdi %x0,%x1,%x1,2"
1625  [(set_attr "type" "vecperm")])
1626
1627;; lxvd2x for little endian loads.  We need several of
1628;; these since the form of the PARALLEL differs by mode.
1629(define_insn "*vsx_lxvd2x2_le_<mode>"
1630  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1631        (vec_select:VSX_LE
1632          (match_operand:VSX_LE 1 "memory_operand" "Z")
1633          (parallel [(const_int 1) (const_int 0)])))]
1634  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1635  "lxvd2x %x0,%y1"
1636  [(set_attr "type" "vecload")])
1637
1638(define_insn "*vsx_lxvd2x4_le_<mode>"
1639  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1640        (vec_select:VSX_W
1641          (match_operand:VSX_W 1 "memory_operand" "Z")
1642          (parallel [(const_int 2) (const_int 3)
1643                     (const_int 0) (const_int 1)])))]
1644  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1645  "lxvd2x %x0,%y1"
1646  [(set_attr "type" "vecload")])
1647
1648(define_insn "*vsx_lxvd2x8_le_V8HI"
1649  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1650        (vec_select:V8HI
1651          (match_operand:V8HI 1 "memory_operand" "Z")
1652          (parallel [(const_int 4) (const_int 5)
1653                     (const_int 6) (const_int 7)
1654                     (const_int 0) (const_int 1)
1655                     (const_int 2) (const_int 3)])))]
1656  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1657  "lxvd2x %x0,%y1"
1658  [(set_attr "type" "vecload")])
1659
1660(define_insn "*vsx_lxvd2x16_le_V16QI"
1661  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1662        (vec_select:V16QI
1663          (match_operand:V16QI 1 "memory_operand" "Z")
1664          (parallel [(const_int 8) (const_int 9)
1665                     (const_int 10) (const_int 11)
1666                     (const_int 12) (const_int 13)
1667                     (const_int 14) (const_int 15)
1668                     (const_int 0) (const_int 1)
1669                     (const_int 2) (const_int 3)
1670                     (const_int 4) (const_int 5)
1671                     (const_int 6) (const_int 7)])))]
1672  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1673  "lxvd2x %x0,%y1"
1674  [(set_attr "type" "vecload")])
1675
1676;; stxvd2x for little endian stores.  We need several of
1677;; these since the form of the PARALLEL differs by mode.
1678(define_insn "*vsx_stxvd2x2_le_<mode>"
1679  [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
1680        (vec_select:VSX_LE
1681          (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1682          (parallel [(const_int 1) (const_int 0)])))]
1683  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1684  "stxvd2x %x1,%y0"
1685  [(set_attr "type" "vecstore")])
1686
1687(define_insn "*vsx_stxvd2x4_le_<mode>"
1688  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
1689        (vec_select:VSX_W
1690          (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1691          (parallel [(const_int 2) (const_int 3)
1692                     (const_int 0) (const_int 1)])))]
1693  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1694  "stxvd2x %x1,%y0"
1695  [(set_attr "type" "vecstore")])
1696
1697(define_insn "*vsx_stxvd2x8_le_V8HI"
1698  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1699        (vec_select:V8HI
1700          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1701          (parallel [(const_int 4) (const_int 5)
1702                     (const_int 6) (const_int 7)
1703                     (const_int 0) (const_int 1)
1704                     (const_int 2) (const_int 3)])))]
1705  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1706  "stxvd2x %x1,%y0"
1707  [(set_attr "type" "vecstore")])
1708
1709(define_insn "*vsx_stxvd2x16_le_V16QI"
1710  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1711        (vec_select:V16QI
1712          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1713          (parallel [(const_int 8) (const_int 9)
1714                     (const_int 10) (const_int 11)
1715                     (const_int 12) (const_int 13)
1716                     (const_int 14) (const_int 15)
1717                     (const_int 0) (const_int 1)
1718                     (const_int 2) (const_int 3)
1719                     (const_int 4) (const_int 5)
1720                     (const_int 6) (const_int 7)])))]
1721  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
1722  "stxvd2x %x1,%y0"
1723  [(set_attr "type" "vecstore")])
1724
1725;; Convert a TImode value into V1TImode
1726(define_expand "vsx_set_v1ti"
1727  [(match_operand:V1TI 0 "nonimmediate_operand" "")
1728   (match_operand:V1TI 1 "nonimmediate_operand" "")
1729   (match_operand:TI 2 "input_operand" "")
1730   (match_operand:QI 3 "u5bit_cint_operand" "")]
1731  "VECTOR_MEM_VSX_P (V1TImode)"
1732{
1733  if (operands[3] != const0_rtx)
1734    gcc_unreachable ();
1735
1736  emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
1737  DONE;
1738})
1739
1740;; Set the element of a V2DI/VD2F mode
1741(define_insn "vsx_set_<mode>"
1742  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
1743	(unspec:VSX_D
1744	 [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
1745	  (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
1746	  (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
1747	 UNSPEC_VSX_SET))]
1748  "VECTOR_MEM_VSX_P (<MODE>mode)"
1749{
1750  int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
1751  if (INTVAL (operands[3]) == idx_first)
1752    return \"xxpermdi %x0,%x2,%x1,1\";
1753  else if (INTVAL (operands[3]) == 1 - idx_first)
1754    return \"xxpermdi %x0,%x1,%x2,0\";
1755  else
1756    gcc_unreachable ();
1757}
1758  [(set_attr "type" "vecperm")])
1759
1760;; Extract a DF/DI element from V2DF/V2DI
1761(define_insn "vsx_extract_<mode>"
1762  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
1763	(vec_select:<VS_scalar> (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa")
1764		       (parallel
1765			[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
1766  "VECTOR_MEM_VSX_P (<MODE>mode)"
1767{
1768  int fldDM;
1769  gcc_assert (UINTVAL (operands[2]) <= 1);
1770  fldDM = INTVAL (operands[2]) << 1;
1771  if (!BYTES_BIG_ENDIAN)
1772    fldDM = 3 - fldDM;
1773  operands[3] = GEN_INT (fldDM);
1774  return \"xxpermdi %x0,%x1,%x1,%3\";
1775}
1776  [(set_attr "type" "vecperm")])
1777
1778;; Optimize extracting element 0 from memory
1779(define_insn "*vsx_extract_<mode>_zero"
1780  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
1781	(vec_select:<VS_scalar>
1782	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
1783	 (parallel [(const_int 0)])))]
1784  "VECTOR_MEM_VSX_P (<MODE>mode)"
1785  "lxsd%U1x %x0,%y1"
1786  [(set (attr "type")
1787      (if_then_else
1788	(match_test "update_indexed_address_mem (operands[1], VOIDmode)")
1789	(const_string "fpload_ux")
1790	(const_string "fpload")))
1791   (set_attr "length" "4")])
1792
1793;; Extract a SF element from V4SF
1794(define_insn_and_split "vsx_extract_v4sf"
1795  [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
1796	(vec_select:SF
1797	 (match_operand:V4SF 1 "vsx_register_operand" "<VSa>,<VSa>")
1798	 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
1799   (clobber (match_scratch:V4SF 3 "=X,0"))]
1800  "VECTOR_UNIT_VSX_P (V4SFmode)"
1801  "@
1802   xscvspdp %x0,%x1
1803   #"
1804  ""
1805  [(const_int 0)]
1806  "
1807{
1808  rtx op0 = operands[0];
1809  rtx op1 = operands[1];
1810  rtx op2 = operands[2];
1811  rtx op3 = operands[3];
1812  rtx tmp;
1813  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
1814
1815  if (ele == 0)
1816    tmp = op1;
1817  else
1818    {
1819      if (GET_CODE (op3) == SCRATCH)
1820	op3 = gen_reg_rtx (V4SFmode);
1821      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
1822      tmp = op3;
1823    }
1824  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
1825  DONE;
1826}"
1827  [(set_attr "length" "4,8")
1828   (set_attr "type" "fp")])
1829
1830;; Expand the builtin form of xxpermdi to canonical rtl.
1831(define_expand "vsx_xxpermdi_<mode>"
1832  [(match_operand:VSX_L 0 "vsx_register_operand" "")
1833   (match_operand:VSX_L 1 "vsx_register_operand" "")
1834   (match_operand:VSX_L 2 "vsx_register_operand" "")
1835   (match_operand:QI 3 "u5bit_cint_operand" "")]
1836  "VECTOR_MEM_VSX_P (<MODE>mode)"
1837{
1838  rtx target = operands[0];
1839  rtx op0 = operands[1];
1840  rtx op1 = operands[2];
1841  int mask = INTVAL (operands[3]);
1842  rtx perm0 = GEN_INT ((mask >> 1) & 1);
1843  rtx perm1 = GEN_INT ((mask & 1) + 2);
1844  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
1845
1846  if (<MODE>mode == V2DFmode)
1847    gen = gen_vsx_xxpermdi2_v2df_1;
1848  else
1849    {
1850      gen = gen_vsx_xxpermdi2_v2di_1;
1851      if (<MODE>mode != V2DImode)
1852	{
1853	  target = gen_lowpart (V2DImode, target);
1854	  op0 = gen_lowpart (V2DImode, op0);
1855	  op1 = gen_lowpart (V2DImode, op1);
1856	}
1857    }
1858  /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
1859     transformation we don't want; it is necessary for
1860     rs6000_expand_vec_perm_const_1 but not for this use.  So we
1861     prepare for that by reversing the transformation here.  */
1862  if (BYTES_BIG_ENDIAN)
1863    emit_insn (gen (target, op0, op1, perm0, perm1));
1864  else
1865    {
1866      rtx p0 = GEN_INT (3 - INTVAL (perm1));
1867      rtx p1 = GEN_INT (3 - INTVAL (perm0));
1868      emit_insn (gen (target, op1, op0, p0, p1));
1869    }
1870  DONE;
1871})
1872
1873(define_insn "vsx_xxpermdi2_<mode>_1"
1874  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
1875	(vec_select:VSX_D
1876	  (vec_concat:<VS_double>
1877	    (match_operand:VSX_D 1 "vsx_register_operand" "wd")
1878	    (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
1879	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
1880		     (match_operand 4 "const_2_to_3_operand" "")])))]
1881  "VECTOR_MEM_VSX_P (<MODE>mode)"
1882{
1883  int op3, op4, mask;
1884
1885  /* For little endian, swap operands and invert/swap selectors
1886     to get the correct xxpermdi.  The operand swap sets up the
1887     inputs as a little endian array.  The selectors are swapped
1888     because they are defined to use big endian ordering.  The
1889     selectors are inverted to get the correct doublewords for
1890     little endian ordering.  */
1891  if (BYTES_BIG_ENDIAN)
1892    {
1893      op3 = INTVAL (operands[3]);
1894      op4 = INTVAL (operands[4]);
1895    }
1896  else
1897    {
1898      op3 = 3 - INTVAL (operands[4]);
1899      op4 = 3 - INTVAL (operands[3]);
1900    }
1901
1902  mask = (op3 << 1) | (op4 - 2);
1903  operands[3] = GEN_INT (mask);
1904
1905  if (BYTES_BIG_ENDIAN)
1906    return "xxpermdi %x0,%x1,%x2,%3";
1907  else
1908    return "xxpermdi %x0,%x2,%x1,%3";
1909}
1910  [(set_attr "type" "vecperm")])
1911
1912(define_expand "vec_perm_const<mode>"
1913  [(match_operand:VSX_D 0 "vsx_register_operand" "")
1914   (match_operand:VSX_D 1 "vsx_register_operand" "")
1915   (match_operand:VSX_D 2 "vsx_register_operand" "")
1916   (match_operand:V2DI  3 "" "")]
1917  "VECTOR_MEM_VSX_P (<MODE>mode)"
1918{
1919  if (rs6000_expand_vec_perm_const (operands))
1920    DONE;
1921  else
1922    FAIL;
1923})
1924
1925;; Expanders for builtins
1926(define_expand "vsx_mergel_<mode>"
1927  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
1928   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
1929   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
1930  "VECTOR_MEM_VSX_P (<MODE>mode)"
1931{
1932  rtvec v;
1933  rtx x;
1934
1935  /* Special handling for LE with -maltivec=be.  */
1936  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
1937    {
1938      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
1939      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
1940    }
1941  else
1942    {
1943      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
1944      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
1945    }
1946
1947  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
1948  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1949  DONE;
1950})
1951
1952(define_expand "vsx_mergeh_<mode>"
1953  [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
1954   (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
1955   (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
1956  "VECTOR_MEM_VSX_P (<MODE>mode)"
1957{
1958  rtvec v;
1959  rtx x;
1960
1961  /* Special handling for LE with -maltivec=be.  */
1962  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
1963    {
1964      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
1965      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
1966    }
1967  else
1968    {
1969      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
1970      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
1971    }
1972
1973  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
1974  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1975  DONE;
1976})
1977
1978;; V2DF/V2DI splat
1979(define_insn "vsx_splat_<mode>"
1980  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?<VSa>,?<VSa>,?<VSa>")
1981	(vec_duplicate:VSX_D
1982	 (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,f,Z,<VSa>,<VSa>,Z")))]
1983  "VECTOR_MEM_VSX_P (<MODE>mode)"
1984  "@
1985   xxpermdi %x0,%x1,%x1,0
1986   xxpermdi %x0,%x1,%x1,0
1987   lxvdsx %x0,%y1
1988   xxpermdi %x0,%x1,%x1,0
1989   xxpermdi %x0,%x1,%x1,0
1990   lxvdsx %x0,%y1"
1991  [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
1992
1993;; V4SF/V4SI splat
1994(define_insn "vsx_xxspltw_<mode>"
1995  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
1996	(vec_duplicate:VSX_W
1997	 (vec_select:<VS_scalar>
1998	  (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
1999	  (parallel
2000	   [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
2001  "VECTOR_MEM_VSX_P (<MODE>mode)"
2002{
2003  if (!BYTES_BIG_ENDIAN)
2004    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
2005
2006  return "xxspltw %x0,%x1,%2";
2007}
2008  [(set_attr "type" "vecperm")])
2009
2010(define_insn "vsx_xxspltw_<mode>_direct"
2011  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2012        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2013                       (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
2014                      UNSPEC_VSX_XXSPLTW))]
2015  "VECTOR_MEM_VSX_P (<MODE>mode)"
2016  "xxspltw %x0,%x1,%2"
2017  [(set_attr "type" "vecperm")])
2018
2019;; V2DF/V2DI splat for use by vec_splat builtin
2020(define_insn "vsx_xxspltd_<mode>"
2021  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2022        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
2023	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
2024                      UNSPEC_VSX_XXSPLTD))]
2025  "VECTOR_MEM_VSX_P (<MODE>mode)"
2026{
2027  if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
2028      || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
2029    return "xxpermdi %x0,%x1,%x1,0";
2030  else
2031    return "xxpermdi %x0,%x1,%x1,3";
2032}
2033  [(set_attr "type" "vecperm")])
2034
2035;; V4SF/V4SI interleave
2036(define_insn "vsx_xxmrghw_<mode>"
2037  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2038        (vec_select:VSX_W
2039	  (vec_concat:<VS_double>
2040	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2041	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
2042	  (parallel [(const_int 0) (const_int 4)
2043		     (const_int 1) (const_int 5)])))]
2044  "VECTOR_MEM_VSX_P (<MODE>mode)"
2045{
2046  if (BYTES_BIG_ENDIAN)
2047    return "xxmrghw %x0,%x1,%x2";
2048  else
2049    return "xxmrglw %x0,%x2,%x1";
2050}
2051  [(set_attr "type" "vecperm")])
2052
2053(define_insn "vsx_xxmrglw_<mode>"
2054  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2055	(vec_select:VSX_W
2056	  (vec_concat:<VS_double>
2057	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2058	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
2059	  (parallel [(const_int 2) (const_int 6)
2060		     (const_int 3) (const_int 7)])))]
2061  "VECTOR_MEM_VSX_P (<MODE>mode)"
2062{
2063  if (BYTES_BIG_ENDIAN)
2064    return "xxmrglw %x0,%x1,%x2";
2065  else
2066    return "xxmrghw %x0,%x2,%x1";
2067}
2068  [(set_attr "type" "vecperm")])
2069
2070;; Shift left double by word immediate
2071(define_insn "vsx_xxsldwi_<mode>"
2072  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
2073	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
2074		       (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
2075		       (match_operand:QI 3 "u5bit_cint_operand" "i")]
2076		      UNSPEC_VSX_SLDWI))]
2077  "VECTOR_MEM_VSX_P (<MODE>mode)"
2078  "xxsldwi %x0,%x1,%x2,%3"
2079  [(set_attr "type" "vecperm")])
2080
2081
2082;; Vector reduction insns and splitters
2083
2084(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
2085  [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
2086	(VEC_reduc:V2DF
2087	 (vec_concat:V2DF
2088	  (vec_select:DF
2089	   (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2090	   (parallel [(const_int 1)]))
2091	  (vec_select:DF
2092	   (match_dup 1)
2093	   (parallel [(const_int 0)])))
2094	 (match_dup 1)))
2095   (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
2096  "VECTOR_UNIT_VSX_P (V2DFmode)"
2097  "#"
2098  ""
2099  [(const_int 0)]
2100  "
2101{
2102  rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
2103	     ? gen_reg_rtx (V2DFmode)
2104	     : operands[2];
2105  emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
2106  emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
2107  DONE;
2108}"
2109  [(set_attr "length" "8")
2110   (set_attr "type" "veccomplex")])
2111
2112(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
2113  [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
2114	(VEC_reduc:V4SF
2115	 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2116	 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
2117   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2118   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
2119  "VECTOR_UNIT_VSX_P (V4SFmode)"
2120  "#"
2121  ""
2122  [(const_int 0)]
2123  "
2124{
2125  rtx op0 = operands[0];
2126  rtx op1 = operands[1];
2127  rtx tmp2, tmp3, tmp4;
2128
2129  if (can_create_pseudo_p ())
2130    {
2131      tmp2 = gen_reg_rtx (V4SFmode);
2132      tmp3 = gen_reg_rtx (V4SFmode);
2133      tmp4 = gen_reg_rtx (V4SFmode);
2134    }
2135  else
2136    {
2137      tmp2 = operands[2];
2138      tmp3 = operands[3];
2139      tmp4 = tmp2;
2140    }
2141
2142  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2143  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2144  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2145  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
2146  DONE;
2147}"
2148  [(set_attr "length" "16")
2149   (set_attr "type" "veccomplex")])
2150
2151;; Combiner patterns with the vector reduction patterns that knows we can get
2152;; to the top element of the V2DF array without doing an extract.
2153
2154(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
2155  [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
2156	(vec_select:DF
2157	 (VEC_reduc:V2DF
2158	  (vec_concat:V2DF
2159	   (vec_select:DF
2160	    (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2161	    (parallel [(const_int 1)]))
2162	   (vec_select:DF
2163	    (match_dup 1)
2164	    (parallel [(const_int 0)])))
2165	  (match_dup 1))
2166	 (parallel [(const_int 1)])))
2167   (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2168  "VECTOR_UNIT_VSX_P (V2DFmode)"
2169  "#"
2170  ""
2171  [(const_int 0)]
2172  "
2173{
2174  rtx hi = gen_highpart (DFmode, operands[1]);
2175  rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2176	    ? gen_reg_rtx (DFmode)
2177	    : operands[2];
2178
2179  emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2180  emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2181  DONE;
2182}"
2183  [(set_attr "length" "8")
2184   (set_attr "type" "veccomplex")])
2185
2186(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2187  [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2188	(vec_select:SF
2189	 (VEC_reduc:V4SF
2190	  (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2191	  (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2192	 (parallel [(const_int 3)])))
2193   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2194   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2195   (clobber (match_scratch:V4SF 4 "=0,0"))]
2196  "VECTOR_UNIT_VSX_P (V4SFmode)"
2197  "#"
2198  ""
2199  [(const_int 0)]
2200  "
2201{
2202  rtx op0 = operands[0];
2203  rtx op1 = operands[1];
2204  rtx tmp2, tmp3, tmp4, tmp5;
2205
2206  if (can_create_pseudo_p ())
2207    {
2208      tmp2 = gen_reg_rtx (V4SFmode);
2209      tmp3 = gen_reg_rtx (V4SFmode);
2210      tmp4 = gen_reg_rtx (V4SFmode);
2211      tmp5 = gen_reg_rtx (V4SFmode);
2212    }
2213  else
2214    {
2215      tmp2 = operands[2];
2216      tmp3 = operands[3];
2217      tmp4 = tmp2;
2218      tmp5 = operands[4];
2219    }
2220
2221  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2222  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2223  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2224  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2225  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2226  DONE;
2227}"
2228  [(set_attr "length" "20")
2229   (set_attr "type" "veccomplex")])
2230
2231
2232;; Power8 Vector fusion.  The fused ops must be physically adjacent.
2233(define_peephole
2234  [(set (match_operand:P 0 "base_reg_operand" "")
2235	(match_operand:P 1 "short_cint_operand" ""))
2236   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2237	(mem:VSX_M2 (plus:P (match_dup 0)
2238			    (match_operand:P 3 "int_reg_operand" ""))))]
2239  "TARGET_VSX && TARGET_P8_FUSION"
2240  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2241  [(set_attr "length" "8")
2242   (set_attr "type" "vecload")])
2243
2244(define_peephole
2245  [(set (match_operand:P 0 "base_reg_operand" "")
2246	(match_operand:P 1 "short_cint_operand" ""))
2247   (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
2248	(mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
2249			    (match_dup 0))))]
2250  "TARGET_VSX && TARGET_P8_FUSION"
2251  "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
2252  [(set_attr "length" "8")
2253   (set_attr "type" "vecload")])
2254