1;; VSX patterns.
2;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Iterator for comparison types
22(define_code_iterator CMP_TEST [eq lt gt unordered])
23
24;; Mode attribute for vector floate and floato conversions
25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
27;; Iterator for both scalar and vector floating point types supported by VSX
28(define_mode_iterator VSX_B [DF V4SF V2DF])
29
30;; Iterator for the 2 64-bit vector types
31(define_mode_iterator VSX_D [V2DF V2DI])
32
33;; Mode iterator to handle swapping words on little endian for the 128-bit
34;; types that goes in a single vector register.
35(define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
36				  (TF   "FLOAT128_VECTOR_P (TFmode)")
37				  TI
38				  V1TI])
39
40;; Iterator for 128-bit integer types that go in a single vector register.
41(define_mode_iterator VSX_TI [TI V1TI])
42
43;; Iterator for the 2 32-bit vector types
44(define_mode_iterator VSX_W [V4SF V4SI])
45
46;; Iterator for the DF types
47(define_mode_iterator VSX_DF [V2DF DF])
48
49;; Iterator for vector floating point types supported by VSX
50(define_mode_iterator VSX_F [V4SF V2DF])
51
52;; Iterator for logical types supported by VSX
53(define_mode_iterator VSX_L [V16QI
54			     V8HI
55			     V4SI
56			     V2DI
57			     V4SF
58			     V2DF
59			     V1TI
60			     TI
61			     (KF	"FLOAT128_VECTOR_P (KFmode)")
62			     (TF	"FLOAT128_VECTOR_P (TFmode)")])
63
64;; Iterator for memory moves.
65(define_mode_iterator VSX_M [V16QI
66			     V8HI
67			     V4SI
68			     V2DI
69			     V4SF
70			     V2DF
71			     V1TI
72			     (KF	"FLOAT128_VECTOR_P (KFmode)")
73			     (TF	"FLOAT128_VECTOR_P (TFmode)")
74			     TI])
75
76(define_mode_attr VSX_XXBR  [(V8HI  "h")
77			     (V4SI  "w")
78			     (V4SF  "w")
79			     (V2DF  "d")
80			     (V2DI  "d")
81			     (V1TI  "q")])
82
83;; Map into the appropriate load/store name based on the type
84(define_mode_attr VSm  [(V16QI "vw4")
85			(V8HI  "vw4")
86			(V4SI  "vw4")
87			(V4SF  "vw4")
88			(V2DF  "vd2")
89			(V2DI  "vd2")
90			(DF    "d")
91			(TF    "vd2")
92			(KF    "vd2")
93			(V1TI  "vd2")
94			(TI    "vd2")])
95
96;; Map into the appropriate suffix based on the type
97(define_mode_attr VSs	[(V16QI "sp")
98			 (V8HI  "sp")
99			 (V4SI  "sp")
100			 (V4SF  "sp")
101			 (V2DF  "dp")
102			 (V2DI  "dp")
103			 (DF    "dp")
104			 (SF	"sp")
105			 (TF    "dp")
106			 (KF    "dp")
107			 (V1TI  "dp")
108			 (TI    "dp")])
109
110;; Map the register class used
111(define_mode_attr VSr	[(V16QI "v")
112			 (V8HI  "v")
113			 (V4SI  "v")
114			 (V4SF  "wf")
115			 (V2DI  "wd")
116			 (V2DF  "wd")
117			 (DI	"wi")
118			 (DF    "ws")
119			 (SF	"ww")
120			 (TF	"wp")
121			 (KF	"wq")
122			 (V1TI  "v")
123			 (TI    "wt")])
124
125;; Map the register class used for float<->int conversions (floating point side)
126;; VSr2 is the preferred register class, VSr3 is any register class that will
127;; hold the data
128(define_mode_attr VSr2	[(V2DF  "wd")
129			 (V4SF  "wf")
130			 (DF    "ws")
131			 (SF	"ww")
132			 (DI	"wi")
133			 (KF	"wq")
134			 (TF	"wp")])
135
136(define_mode_attr VSr3	[(V2DF  "wa")
137			 (V4SF  "wa")
138			 (DF    "ws")
139			 (SF	"ww")
140			 (DI	"wi")
141			 (KF	"wq")
142			 (TF	"wp")])
143
144;; Map the register class for sp<->dp float conversions, destination
145(define_mode_attr VSr4	[(SF	"ws")
146			 (DF	"f")
147			 (V2DF  "wd")
148			 (V4SF	"v")])
149
150;; Map the register class for sp<->dp float conversions, source
151(define_mode_attr VSr5	[(SF	"ws")
152			 (DF	"f")
153			 (V2DF  "v")
154			 (V4SF	"wd")])
155
156;; The VSX register class that a type can occupy, even if it is not the
157;; preferred register class (VSr is the preferred register class that will get
158;; allocated first).
159(define_mode_attr VSa	[(V16QI "wa")
160			 (V8HI  "wa")
161			 (V4SI  "wa")
162			 (V4SF  "wa")
163			 (V2DI  "wa")
164			 (V2DF  "wa")
165			 (DI	"wi")
166			 (DF    "ws")
167			 (SF	"ww")
168			 (V1TI	"wa")
169			 (TI    "wt")
170			 (TF	"wp")
171			 (KF	"wq")])
172
173;; A mode attribute to disparage use of GPR registers, except for scalar
174;; integer modes.
175(define_mode_attr ??r	[(V16QI	"??r")
176			 (V8HI	"??r")
177			 (V4SI	"??r")
178			 (V4SF	"??r")
179			 (V2DI	"??r")
180			 (V2DF	"??r")
181			 (V1TI	"??r")
182			 (KF	"??r")
183			 (TF	"??r")
184			 (TI	"r")])
185
186;; Same size integer type for floating point data
187(define_mode_attr VSi [(V4SF  "v4si")
188		       (V2DF  "v2di")
189		       (DF    "di")])
190
191(define_mode_attr VSI [(V4SF  "V4SI")
192		       (V2DF  "V2DI")
193		       (DF    "DI")])
194
195;; Word size for same size conversion
196(define_mode_attr VSc [(V4SF "w")
197		       (V2DF "d")
198		       (DF   "d")])
199
200;; Map into either s or v, depending on whether this is a scalar or vector
201;; operation
202(define_mode_attr VSv	[(V16QI "v")
203			 (V8HI  "v")
204			 (V4SI  "v")
205			 (V4SF  "v")
206			 (V2DI  "v")
207			 (V2DF  "v")
208			 (V1TI  "v")
209			 (DF    "s")
210			 (KF	"v")])
211
212;; Appropriate type for add ops (and other simple FP ops)
213(define_mode_attr VStype_simple	[(V2DF "vecdouble")
214				 (V4SF "vecfloat")
215				 (DF   "fp")])
216
217(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
218				   (V4SF "fp_addsub_s")
219				   (DF   "fp_addsub_d")])
220
221;; Appropriate type for multiply ops
222(define_mode_attr VStype_mul	[(V2DF "vecdouble")
223				 (V4SF "vecfloat")
224				 (DF   "dmul")])
225
226(define_mode_attr VSfptype_mul	[(V2DF "fp_mul_d")
227				 (V4SF "fp_mul_s")
228				 (DF   "fp_mul_d")])
229
230;; Appropriate type for divide ops.
231(define_mode_attr VStype_div	[(V2DF "vecdiv")
232				 (V4SF "vecfdiv")
233				 (DF   "ddiv")])
234
235(define_mode_attr VSfptype_div	[(V2DF "fp_div_d")
236				 (V4SF "fp_div_s")
237				 (DF   "fp_div_d")])
238
239;; Appropriate type for sqrt ops.  For now, just lump the vector sqrt with
240;; the scalar sqrt
241(define_mode_attr VStype_sqrt	[(V2DF "dsqrt")
242				 (V4SF "ssqrt")
243				 (DF   "dsqrt")])
244
245(define_mode_attr VSfptype_sqrt	[(V2DF "fp_sqrt_d")
246				 (V4SF "fp_sqrt_s")
247				 (DF   "fp_sqrt_d")])
248
249;; Iterator and modes for sp<->dp conversions
250;; Because scalar SF values are represented internally as double, use the
251;; V4SF type to represent this than SF.
252(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
253
254(define_mode_attr VS_spdp_res [(DF	"V4SF")
255			       (V4SF	"V2DF")
256			       (V2DF	"V4SF")])
257
258(define_mode_attr VS_spdp_insn [(DF	"xscvdpsp")
259				(V4SF	"xvcvspdp")
260				(V2DF	"xvcvdpsp")])
261
262(define_mode_attr VS_spdp_type [(DF	"fp")
263				(V4SF	"vecdouble")
264				(V2DF	"vecdouble")])
265
266;; Map the scalar mode for a vector type
267(define_mode_attr VS_scalar [(V1TI	"TI")
268			     (V2DF	"DF")
269			     (V2DI	"DI")
270			     (V4SF	"SF")
271			     (V4SI	"SI")
272			     (V8HI	"HI")
273			     (V16QI	"QI")])
274
275;; Map to a double-sized vector mode
276(define_mode_attr VS_double [(V4SI	"V8SI")
277			     (V4SF	"V8SF")
278			     (V2DI	"V4DI")
279			     (V2DF	"V4DF")
280			     (V1TI	"V2TI")])
281
282;; Map register class for 64-bit element in 128-bit vector for direct moves
283;; to/from gprs
284(define_mode_attr VS_64dm [(V2DF	"wk")
285			   (V2DI	"wj")])
286
287;; Map register class for 64-bit element in 128-bit vector for normal register
288;; to register moves
289(define_mode_attr VS_64reg [(V2DF	"ws")
290			    (V2DI	"wi")])
291
292;; Iterators for loading constants with xxspltib
293(define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
294(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
295
296;; Vector reverse byte modes
297(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
298
299;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
300;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
301;; done on ISA 2.07 and not just ISA 3.0.
302(define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
303(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
304
305(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
306		  		     (V8HI "h")
307				     (V4SI "w")])
308
309;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
310;; insert to validate the operand number.
311(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
312					 (V8HI  "const_0_to_7_operand")
313					 (V4SI  "const_0_to_3_operand")])
314
315;; Mode attribute to give the constraint for vector extract and insert
316;; operations.
317(define_mode_attr VSX_EX [(V16QI "v")
318			  (V8HI  "v")
319			  (V4SI  "wa")])
320
321;; Mode iterator for binary floating types other than double to
322;; optimize convert to that floating point type from an extract
323;; of an integer type
324(define_mode_iterator VSX_EXTRACT_FL [SF
325				      (IF "FLOAT128_2REG_P (IFmode)")
326				      (KF "TARGET_FLOAT128_HW")
327				      (TF "FLOAT128_2REG_P (TFmode)
328					   || (FLOAT128_IEEE_P (TFmode)
329					       && TARGET_FLOAT128_HW)")])
330
331;; Mode iterator for binary floating types that have a direct conversion
332;; from 64-bit integer to floating point
333(define_mode_iterator FL_CONV [SF
334			       DF
335			       (KF "TARGET_FLOAT128_HW")
336			       (TF "TARGET_FLOAT128_HW
337				    && FLOAT128_IEEE_P (TFmode)")])
338
339;; Iterator for the 2 short vector types to do a splat from an integer
340(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
341
342;; Mode attribute to give the count for the splat instruction to splat
343;; the value in the 64-bit integer slot
344(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
345
346;; Mode attribute to give the suffix for the splat instruction
347(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
348
349;; Constants for creating unspecs
350(define_c_enum "unspec"
351  [UNSPEC_VSX_CONCAT
352   UNSPEC_VSX_CVDPSXWS
353   UNSPEC_VSX_CVDPUXWS
354   UNSPEC_VSX_CVSPDP
355   UNSPEC_VSX_CVHPSP
356   UNSPEC_VSX_CVSPDPN
357   UNSPEC_VSX_CVDPSPN
358   UNSPEC_VSX_CVSXWDP
359   UNSPEC_VSX_CVUXWDP
360   UNSPEC_VSX_CVSXDSP
361   UNSPEC_VSX_CVUXDSP
362   UNSPEC_VSX_CVSPSXDS
363   UNSPEC_VSX_CVSPUXDS
364   UNSPEC_VSX_CVSXWSP
365   UNSPEC_VSX_CVUXWSP
366   UNSPEC_VSX_FLOAT2
367   UNSPEC_VSX_UNS_FLOAT2
368   UNSPEC_VSX_FLOATE
369   UNSPEC_VSX_UNS_FLOATE
370   UNSPEC_VSX_FLOATO
371   UNSPEC_VSX_UNS_FLOATO
372   UNSPEC_VSX_TDIV
373   UNSPEC_VSX_TSQRT
374   UNSPEC_VSX_SET
375   UNSPEC_VSX_ROUND_I
376   UNSPEC_VSX_ROUND_IC
377   UNSPEC_VSX_SLDWI
378   UNSPEC_VSX_XXPERM
379
380   UNSPEC_VSX_XXSPLTW
381   UNSPEC_VSX_XXSPLTD
382   UNSPEC_VSX_DIVSD
383   UNSPEC_VSX_DIVUD
384   UNSPEC_VSX_MULSD
385   UNSPEC_VSX_XVCVSXDDP
386   UNSPEC_VSX_XVCVUXDDP
387   UNSPEC_VSX_XVCVDPSXDS
388   UNSPEC_VSX_XVCDPSP
389   UNSPEC_VSX_XVCVDPUXDS
390   UNSPEC_VSX_SIGN_EXTEND
391   UNSPEC_VSX_XVCVSPSXWS
392   UNSPEC_VSX_XVCVSPSXDS
393   UNSPEC_VSX_VSLO
394   UNSPEC_VSX_EXTRACT
395   UNSPEC_VSX_SXEXPDP
396   UNSPEC_VSX_SXSIG
397   UNSPEC_VSX_SIEXPDP
398   UNSPEC_VSX_SIEXPQP
399   UNSPEC_VSX_SCMPEXPDP
400   UNSPEC_VSX_STSTDC
401   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
402   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
403   UNSPEC_VSX_VXEXP
404   UNSPEC_VSX_VXSIG
405   UNSPEC_VSX_VIEXP
406   UNSPEC_VSX_VTSTDC
407   UNSPEC_VSX_VEC_INIT
408   UNSPEC_VSX_VSIGNED2
409
410   UNSPEC_LXVL
411   UNSPEC_LXVLL
412   UNSPEC_LVSL_REG
413   UNSPEC_LVSR_REG
414   UNSPEC_STXVL
415   UNSPEC_STXVLL
416   UNSPEC_XL_LEN_R
417   UNSPEC_XST_LEN_R
418
419   UNSPEC_VCLZLSBB
420   UNSPEC_VCTZLSBB
421   UNSPEC_VEXTUBLX
422   UNSPEC_VEXTUHLX
423   UNSPEC_VEXTUWLX
424   UNSPEC_VEXTUBRX
425   UNSPEC_VEXTUHRX
426   UNSPEC_VEXTUWRX
427   UNSPEC_VCMPNEB
428   UNSPEC_VCMPNEZB
429   UNSPEC_VCMPNEH
430   UNSPEC_VCMPNEZH
431   UNSPEC_VCMPNEW
432   UNSPEC_VCMPNEZW
433   UNSPEC_XXEXTRACTUW
434   UNSPEC_XXINSERTW
435   UNSPEC_VSX_FIRST_MATCH_INDEX
436   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
437   UNSPEC_VSX_FIRST_MISMATCH_INDEX
438   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
439  ])
440
441;; VSX moves
442
443;; The patterns for LE permuted loads and stores come before the general
444;; VSX moves so they match first.
445(define_insn_and_split "*vsx_le_perm_load_<mode>"
446  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
447        (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
448  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
449  "#"
450  "&& 1"
451  [(set (match_dup 2)
452        (vec_select:<MODE>
453          (match_dup 1)
454          (parallel [(const_int 1) (const_int 0)])))
455   (set (match_dup 0)
456        (vec_select:<MODE>
457          (match_dup 2)
458          (parallel [(const_int 1) (const_int 0)])))]
459{
460  rtx mem = operands[1];
461
462  /* Don't apply the swap optimization if we've already performed register
463     allocation and the hard register destination is not in the altivec
464     range.  */
465  if ((MEM_ALIGN (mem) >= 128)
466      && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
467	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
468    {
469      rtx mem_address = XEXP (mem, 0);
470      enum machine_mode mode = GET_MODE (mem);
471
472      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
473        {
474	  /* Replace the source memory address with masked address.  */
475          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
476	  emit_insn (lvx_set_expr);
477	  DONE;
478        }
479      else if (rs6000_quadword_masked_address_p (mem_address))
480        {
481	  /* This rtl is already in the form that matches lvx
482	     instruction, so leave it alone.  */
483	  DONE;
484        }
485      /* Otherwise, fall through to transform into a swapping load.  */
486    }
487  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
488                                       : operands[0];
489}
490  [(set_attr "type" "vecload")
491   (set_attr "length" "8")])
492
493(define_insn_and_split "*vsx_le_perm_load_<mode>"
494  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
495        (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
496  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
497  "#"
498  "&& 1"
499  [(set (match_dup 2)
500        (vec_select:<MODE>
501          (match_dup 1)
502          (parallel [(const_int 2) (const_int 3)
503                     (const_int 0) (const_int 1)])))
504   (set (match_dup 0)
505        (vec_select:<MODE>
506          (match_dup 2)
507          (parallel [(const_int 2) (const_int 3)
508                     (const_int 0) (const_int 1)])))]
509{
510  rtx mem = operands[1];
511
512  /* Don't apply the swap optimization if we've already performed register
513     allocation and the hard register destination is not in the altivec
514     range.  */
515  if ((MEM_ALIGN (mem) >= 128)
516      && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
517	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
518    {
519      rtx mem_address = XEXP (mem, 0);
520      enum machine_mode mode = GET_MODE (mem);
521
522      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
523        {
524	  /* Replace the source memory address with masked address.  */
525          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
526	  emit_insn (lvx_set_expr);
527	  DONE;
528        }
529      else if (rs6000_quadword_masked_address_p (mem_address))
530        {
531	  /* This rtl is already in the form that matches lvx
532	     instruction, so leave it alone.  */
533	  DONE;
534        }
535      /* Otherwise, fall through to transform into a swapping load.  */
536    }
537  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
538                                       : operands[0];
539}
540  [(set_attr "type" "vecload")
541   (set_attr "length" "8")])
542
543(define_insn_and_split "*vsx_le_perm_load_v8hi"
544  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
545        (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
546  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
547  "#"
548  "&& 1"
549  [(set (match_dup 2)
550        (vec_select:V8HI
551          (match_dup 1)
552          (parallel [(const_int 4) (const_int 5)
553                     (const_int 6) (const_int 7)
554                     (const_int 0) (const_int 1)
555                     (const_int 2) (const_int 3)])))
556   (set (match_dup 0)
557        (vec_select:V8HI
558          (match_dup 2)
559          (parallel [(const_int 4) (const_int 5)
560                     (const_int 6) (const_int 7)
561                     (const_int 0) (const_int 1)
562                     (const_int 2) (const_int 3)])))]
563{
564  rtx mem = operands[1];
565
566  /* Don't apply the swap optimization if we've already performed register
567     allocation and the hard register destination is not in the altivec
568     range.  */
569  if ((MEM_ALIGN (mem) >= 128)
570      && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
571	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
572    {
573      rtx mem_address = XEXP (mem, 0);
574      enum machine_mode mode = GET_MODE (mem);
575
576      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
577        {
578	  /* Replace the source memory address with masked address.  */
579	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
580	  emit_insn (lvx_set_expr);
581	  DONE;
582        }
583      else if (rs6000_quadword_masked_address_p (mem_address))
584        {
585	  /* This rtl is already in the form that matches lvx
586	     instruction, so leave it alone.  */
587	  DONE;
588        }
589      /* Otherwise, fall through to transform into a swapping load.  */
590    }
591  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
592                                       : operands[0];
593}
594  [(set_attr "type" "vecload")
595   (set_attr "length" "8")])
596
597(define_insn_and_split "*vsx_le_perm_load_v16qi"
598  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
599        (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
600  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
601  "#"
602  "&& 1"
603  [(set (match_dup 2)
604        (vec_select:V16QI
605          (match_dup 1)
606          (parallel [(const_int 8) (const_int 9)
607                     (const_int 10) (const_int 11)
608                     (const_int 12) (const_int 13)
609                     (const_int 14) (const_int 15)
610                     (const_int 0) (const_int 1)
611                     (const_int 2) (const_int 3)
612                     (const_int 4) (const_int 5)
613                     (const_int 6) (const_int 7)])))
614   (set (match_dup 0)
615        (vec_select:V16QI
616          (match_dup 2)
617          (parallel [(const_int 8) (const_int 9)
618                     (const_int 10) (const_int 11)
619                     (const_int 12) (const_int 13)
620                     (const_int 14) (const_int 15)
621                     (const_int 0) (const_int 1)
622                     (const_int 2) (const_int 3)
623                     (const_int 4) (const_int 5)
624                     (const_int 6) (const_int 7)])))]
625{
626  rtx mem = operands[1];
627
628  /* Don't apply the swap optimization if we've already performed register
629     allocation and the hard register destination is not in the altivec
630     range.  */
631  if ((MEM_ALIGN (mem) >= 128)
632      && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
633	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
634    {
635      rtx mem_address = XEXP (mem, 0);
636      enum machine_mode mode = GET_MODE (mem);
637
638      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
639        {
640	  /* Replace the source memory address with masked address.  */
641	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
642	  emit_insn (lvx_set_expr);
643	  DONE;
644        }
645      else if (rs6000_quadword_masked_address_p (mem_address))
646        {
647	  /* This rtl is already in the form that matches lvx
648	     instruction, so leave it alone.  */
649	  DONE;
650        }
651      /* Otherwise, fall through to transform into a swapping load.  */
652    }
653  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
654                                       : operands[0];
655}
656  [(set_attr "type" "vecload")
657   (set_attr "length" "8")])
658
659(define_insn "*vsx_le_perm_store_<mode>"
660  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
661        (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
662  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
663  "#"
664  [(set_attr "type" "vecstore")
665   (set_attr "length" "12")])
666
667(define_split
668  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
669        (match_operand:VSX_D 1 "vsx_register_operand"))]
670  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
671  [(set (match_dup 2)
672        (vec_select:<MODE>
673          (match_dup 1)
674          (parallel [(const_int 1) (const_int 0)])))
675   (set (match_dup 0)
676        (vec_select:<MODE>
677          (match_dup 2)
678          (parallel [(const_int 1) (const_int 0)])))]
679{
680  rtx mem = operands[0];
681
682  /* Don't apply the swap optimization if we've already performed register
683     allocation and the hard register source is not in the altivec range.  */
684  if ((MEM_ALIGN (mem) >= 128)
685      && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
686          || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
687    {
688      rtx mem_address = XEXP (mem, 0);
689      enum machine_mode mode = GET_MODE (mem);
690      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
691	{
692	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
693	  emit_insn (stvx_set_expr);
694	  DONE;
695	}
696      else if (rs6000_quadword_masked_address_p (mem_address))
697	{
698	  /* This rtl is already in the form that matches stvx instruction,
699	     so leave it alone.  */
700	  DONE;
701	}
702      /* Otherwise, fall through to transform into a swapping store.  */
703    }
704
705  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
706                                       : operands[1];
707})
708
709;; The post-reload split requires that we re-permute the source
710;; register in case it is still live.
711(define_split
712  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
713        (match_operand:VSX_D 1 "vsx_register_operand"))]
714  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
715  [(set (match_dup 1)
716        (vec_select:<MODE>
717          (match_dup 1)
718          (parallel [(const_int 1) (const_int 0)])))
719   (set (match_dup 0)
720        (vec_select:<MODE>
721          (match_dup 1)
722          (parallel [(const_int 1) (const_int 0)])))
723   (set (match_dup 1)
724        (vec_select:<MODE>
725          (match_dup 1)
726          (parallel [(const_int 1) (const_int 0)])))]
727  "")
728
729(define_insn "*vsx_le_perm_store_<mode>"
730  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
731        (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
732  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
733  "#"
734  [(set_attr "type" "vecstore")
735   (set_attr "length" "12")])
736
737(define_split
738  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
739        (match_operand:VSX_W 1 "vsx_register_operand"))]
740  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
741  [(set (match_dup 2)
742        (vec_select:<MODE>
743          (match_dup 1)
744          (parallel [(const_int 2) (const_int 3)
745	             (const_int 0) (const_int 1)])))
746   (set (match_dup 0)
747        (vec_select:<MODE>
748          (match_dup 2)
749          (parallel [(const_int 2) (const_int 3)
750	             (const_int 0) (const_int 1)])))]
751{
752  rtx mem = operands[0];
753
754  /* Don't apply the swap optimization if we've already performed register
755     allocation and the hard register source is not in the altivec range.  */
756  if ((MEM_ALIGN (mem) >= 128)
757      && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
758          || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
759    {
760      rtx mem_address = XEXP (mem, 0);
761      enum machine_mode mode = GET_MODE (mem);
762      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
763	{
764	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
765	  emit_insn (stvx_set_expr);
766	  DONE;
767	}
768      else if (rs6000_quadword_masked_address_p (mem_address))
769	{
770	  /* This rtl is already in the form that matches stvx instruction,
771	     so leave it alone.  */
772	  DONE;
773	}
774      /* Otherwise, fall through to transform into a swapping store.  */
775    }
776
777  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
778                                       : operands[1];
779})
780
781;; The post-reload split requires that we re-permute the source
782;; register in case it is still live.
783(define_split
784  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
785        (match_operand:VSX_W 1 "vsx_register_operand"))]
786  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
787  [(set (match_dup 1)
788        (vec_select:<MODE>
789          (match_dup 1)
790          (parallel [(const_int 2) (const_int 3)
791	             (const_int 0) (const_int 1)])))
792   (set (match_dup 0)
793        (vec_select:<MODE>
794          (match_dup 1)
795          (parallel [(const_int 2) (const_int 3)
796	             (const_int 0) (const_int 1)])))
797   (set (match_dup 1)
798        (vec_select:<MODE>
799          (match_dup 1)
800          (parallel [(const_int 2) (const_int 3)
801	             (const_int 0) (const_int 1)])))]
802  "")
803
804(define_insn "*vsx_le_perm_store_v8hi"
805  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
806        (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
807  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
808  "#"
809  [(set_attr "type" "vecstore")
810   (set_attr "length" "12")])
811
812(define_split
813  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
814        (match_operand:V8HI 1 "vsx_register_operand"))]
815  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
816  [(set (match_dup 2)
817        (vec_select:V8HI
818          (match_dup 1)
819          (parallel [(const_int 4) (const_int 5)
820                     (const_int 6) (const_int 7)
821                     (const_int 0) (const_int 1)
822                     (const_int 2) (const_int 3)])))
823   (set (match_dup 0)
824        (vec_select:V8HI
825          (match_dup 2)
826          (parallel [(const_int 4) (const_int 5)
827                     (const_int 6) (const_int 7)
828                     (const_int 0) (const_int 1)
829                     (const_int 2) (const_int 3)])))]
830{
831  rtx mem = operands[0];
832
833  /* Don't apply the swap optimization if we've already performed register
834     allocation and the hard register source is not in the altivec range.  */
835  if ((MEM_ALIGN (mem) >= 128)
836      && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
837          || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
838    {
839      rtx mem_address = XEXP (mem, 0);
840      enum machine_mode mode = GET_MODE (mem);
841      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
842	{
843	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
844	  emit_insn (stvx_set_expr);
845	  DONE;
846	}
847      else if (rs6000_quadword_masked_address_p (mem_address))
848	{
849	  /* This rtl is already in the form that matches stvx instruction,
850	     so leave it alone.  */
851	  DONE;
852	}
853      /* Otherwise, fall through to transform into a swapping store.  */
854    }
855
856  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
857                                       : operands[1];
858})
859
860;; The post-reload split requires that we re-permute the source
861;; register in case it is still live.
862(define_split
863  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
864        (match_operand:V8HI 1 "vsx_register_operand"))]
865  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
866  [(set (match_dup 1)
867        (vec_select:V8HI
868          (match_dup 1)
869          (parallel [(const_int 4) (const_int 5)
870                     (const_int 6) (const_int 7)
871                     (const_int 0) (const_int 1)
872                     (const_int 2) (const_int 3)])))
873   (set (match_dup 0)
874        (vec_select:V8HI
875          (match_dup 1)
876          (parallel [(const_int 4) (const_int 5)
877                     (const_int 6) (const_int 7)
878                     (const_int 0) (const_int 1)
879                     (const_int 2) (const_int 3)])))
880   (set (match_dup 1)
881        (vec_select:V8HI
882          (match_dup 1)
883          (parallel [(const_int 4) (const_int 5)
884                     (const_int 6) (const_int 7)
885                     (const_int 0) (const_int 1)
886                     (const_int 2) (const_int 3)])))]
887  "")
888
889(define_insn "*vsx_le_perm_store_v16qi"
890  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
891        (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
892  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
893  "#"
894  [(set_attr "type" "vecstore")
895   (set_attr "length" "12")])
896
897(define_split
898  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
899        (match_operand:V16QI 1 "vsx_register_operand"))]
900  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
901  [(set (match_dup 2)
902        (vec_select:V16QI
903          (match_dup 1)
904          (parallel [(const_int 8) (const_int 9)
905                     (const_int 10) (const_int 11)
906                     (const_int 12) (const_int 13)
907                     (const_int 14) (const_int 15)
908                     (const_int 0) (const_int 1)
909                     (const_int 2) (const_int 3)
910                     (const_int 4) (const_int 5)
911                     (const_int 6) (const_int 7)])))
912   (set (match_dup 0)
913        (vec_select:V16QI
914          (match_dup 2)
915          (parallel [(const_int 8) (const_int 9)
916                     (const_int 10) (const_int 11)
917                     (const_int 12) (const_int 13)
918                     (const_int 14) (const_int 15)
919                     (const_int 0) (const_int 1)
920                     (const_int 2) (const_int 3)
921                     (const_int 4) (const_int 5)
922                     (const_int 6) (const_int 7)])))]
923{
924  rtx mem = operands[0];
925
926  /* Don't apply the swap optimization if we've already performed register
927     allocation and the hard register source is not in the altivec range.  */
928  if ((MEM_ALIGN (mem) >= 128)
929      && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
930          || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
931    {
932      rtx mem_address = XEXP (mem, 0);
933      enum machine_mode mode = GET_MODE (mem);
934      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
935	{
936	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
937	  emit_insn (stvx_set_expr);
938	  DONE;
939	}
940      else if (rs6000_quadword_masked_address_p (mem_address))
941	{
942	  /* This rtl is already in the form that matches stvx instruction,
943	     so leave it alone.  */
944	  DONE;
945	}
946      /* Otherwise, fall through to transform into a swapping store.  */
947    }
948
949  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
950                                       : operands[1];
951})
952
953;; The post-reload split requires that we re-permute the source
954;; register in case it is still live.
955(define_split
956  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
957        (match_operand:V16QI 1 "vsx_register_operand"))]
958  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
959  [(set (match_dup 1)
960        (vec_select:V16QI
961          (match_dup 1)
962          (parallel [(const_int 8) (const_int 9)
963                     (const_int 10) (const_int 11)
964                     (const_int 12) (const_int 13)
965                     (const_int 14) (const_int 15)
966                     (const_int 0) (const_int 1)
967                     (const_int 2) (const_int 3)
968                     (const_int 4) (const_int 5)
969                     (const_int 6) (const_int 7)])))
970   (set (match_dup 0)
971        (vec_select:V16QI
972          (match_dup 1)
973          (parallel [(const_int 8) (const_int 9)
974                     (const_int 10) (const_int 11)
975                     (const_int 12) (const_int 13)
976                     (const_int 14) (const_int 15)
977                     (const_int 0) (const_int 1)
978                     (const_int 2) (const_int 3)
979                     (const_int 4) (const_int 5)
980                     (const_int 6) (const_int 7)])))
981   (set (match_dup 1)
982        (vec_select:V16QI
983          (match_dup 1)
984          (parallel [(const_int 8) (const_int 9)
985                     (const_int 10) (const_int 11)
986                     (const_int 12) (const_int 13)
987                     (const_int 14) (const_int 15)
988                     (const_int 0) (const_int 1)
989                     (const_int 2) (const_int 3)
990                     (const_int 4) (const_int 5)
991                     (const_int 6) (const_int 7)])))]
992  "")
993
994;; Little endian word swapping for 128-bit types that are either scalars or the
995;; special V1TI container class, which it is not appropriate to use vec_select
996;; for the type.
997(define_insn "*vsx_le_permute_<mode>"
998  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
999	(rotate:VSX_TI
1000	 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
1001	 (const_int 64)))]
1002  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1003  "@
1004   xxpermdi %x0,%x1,%x1,2
1005   lxvd2x %x0,%y1
1006   stxvd2x %x1,%y0
1007   mr %0,%L1\;mr %L0,%1
1008   ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1009   std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1010  [(set_attr "length" "4,4,4,8,8,8")
1011   (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
1012
1013(define_insn_and_split "*vsx_le_undo_permute_<mode>"
1014  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1015	(rotate:VSX_TI
1016	 (rotate:VSX_TI
1017	  (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
1018	  (const_int 64))
1019	 (const_int 64)))]
1020  "!BYTES_BIG_ENDIAN && TARGET_VSX"
1021  "@
1022   #
1023   xxlor %x0,%x1"
1024  ""
1025  [(set (match_dup 0) (match_dup 1))]
1026{
1027  if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1028    {
1029      emit_note (NOTE_INSN_DELETED);
1030      DONE;
1031    }
1032}
1033  [(set_attr "length" "0,4")
1034   (set_attr "type" "veclogical")])
1035
1036(define_insn_and_split "*vsx_le_perm_load_<mode>"
1037  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1038        (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1039  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1040  "@
1041   #
1042   #"
1043  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1044  [(const_int 0)]
1045{
1046  rtx tmp = (can_create_pseudo_p ()
1047	     ? gen_reg_rtx_and_attrs (operands[0])
1048	     : operands[0]);
1049  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1050  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1051  DONE;
1052}
1053  [(set_attr "type" "vecload,load")
1054   (set_attr "length" "8,8")])
1055
1056(define_insn "*vsx_le_perm_store_<mode>"
1057  [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1058        (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
1059  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
1060  "@
1061   #
1062   #"
1063  [(set_attr "type" "vecstore,store")
1064   (set_attr "length" "12,8")])
1065
1066(define_split
1067  [(set (match_operand:VSX_LE_128 0 "memory_operand")
1068        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1069  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
1070  [(const_int 0)]
1071{
1072  rtx tmp = (can_create_pseudo_p ()
1073	     ? gen_reg_rtx_and_attrs (operands[0])
1074	     : operands[0]);
1075  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1076  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1077  DONE;
1078})
1079
1080;; Peepholes to catch loads and stores for TImode if TImode landed in
1081;; GPR registers on a little endian system.
1082(define_peephole2
1083  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1084	(rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1085		       (const_int 64)))
1086   (set (match_operand:VSX_TI 2 "int_reg_operand")
1087	(rotate:VSX_TI (match_dup 0)
1088		       (const_int 64)))]
1089  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1090   && (rtx_equal_p (operands[0], operands[2])
1091       || peep2_reg_dead_p (2, operands[0]))"
1092   [(set (match_dup 2) (match_dup 1))])
1093
1094(define_peephole2
1095  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1096	(rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1097		       (const_int 64)))
1098   (set (match_operand:VSX_TI 2 "memory_operand")
1099	(rotate:VSX_TI (match_dup 0)
1100		       (const_int 64)))]
1101  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1102   && peep2_reg_dead_p (2, operands[0])"
1103   [(set (match_dup 2) (match_dup 1))])
1104
1105;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1106;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1107;; floating point are handled by the more generic swap elimination pass.
1108(define_peephole2
1109  [(set (match_operand:TI 0 "vsx_register_operand")
1110	(rotate:TI (match_operand:TI 1 "vsx_register_operand")
1111		   (const_int 64)))
1112   (set (match_operand:TI 2 "vsx_register_operand")
1113	(rotate:TI (match_dup 0)
1114		   (const_int 64)))]
1115  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1116   && (rtx_equal_p (operands[0], operands[2])
1117       || peep2_reg_dead_p (2, operands[0]))"
1118   [(set (match_dup 2) (match_dup 1))])
1119
1120;; The post-reload split requires that we re-permute the source
1121;; register in case it is still live.
1122(define_split
1123  [(set (match_operand:VSX_LE_128 0 "memory_operand")
1124        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1125  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
1126  [(const_int 0)]
1127{
1128  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1129  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1130  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1131  DONE;
1132})
1133
1134;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1135;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1136(define_insn "xxspltib_v16qi"
1137  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1138	(vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1139  "TARGET_P9_VECTOR"
1140{
1141  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1142  return "xxspltib %x0,%2";
1143}
1144  [(set_attr "type" "vecperm")])
1145
1146(define_insn "xxspltib_<mode>_nosplit"
1147  [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1148	(match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1149  "TARGET_P9_VECTOR"
1150{
1151  rtx op1 = operands[1];
1152  int value = 256;
1153  int num_insns = -1;
1154
1155  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1156      || num_insns != 1)
1157    gcc_unreachable ();
1158
1159  operands[2] = GEN_INT (value & 0xff);
1160  return "xxspltib %x0,%2";
1161}
1162  [(set_attr "type" "vecperm")])
1163
1164(define_insn_and_split "*xxspltib_<mode>_split"
1165  [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1166	(match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1167  "TARGET_P9_VECTOR"
1168  "#"
1169  "&& 1"
1170  [(const_int 0)]
1171{
1172  int value = 256;
1173  int num_insns = -1;
1174  rtx op0 = operands[0];
1175  rtx op1 = operands[1];
1176  rtx tmp = ((can_create_pseudo_p ())
1177	     ? gen_reg_rtx (V16QImode)
1178	     : gen_lowpart (V16QImode, op0));
1179
1180  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1181      || num_insns != 2)
1182    gcc_unreachable ();
1183
1184  emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1185
1186  if (<MODE>mode == V2DImode)
1187    emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1188
1189  else if (<MODE>mode == V4SImode)
1190    emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1191
1192  else if (<MODE>mode == V8HImode)
1193    emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1194
1195  else
1196    gcc_unreachable ();
1197
1198  DONE;
1199}
1200  [(set_attr "type" "vecperm")
1201   (set_attr "length" "8")])
1202
1203
1204;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1205;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1206;; all 1's, since the machine does not have to wait for the previous
1207;; instruction using the register being set (such as a store waiting on a slow
1208;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1209
1210;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1211;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1212;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
1213(define_insn "*vsx_mov<mode>_64bit"
1214  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1215               "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
1216                ?&r,       ??r,       ??Y,       <??r>,     wo,        v,
1217                ?<VSa>,    *r,        v,         ??r,       wZ,        v")
1218
1219	(match_operand:VSX_M 1 "input_operand"
1220               "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
1221                wQ,        Y,         r,         r,         wE,        jwM,
1222                ?jwM,      jwM,       W,         W,         v,         wZ"))]
1223
1224  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1225   && (register_operand (operands[0], <MODE>mode)
1226       || register_operand (operands[1], <MODE>mode))"
1227{
1228  return rs6000_output_move_128bit (operands);
1229}
1230  [(set_attr "type"
1231               "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1232                store,     load,      store,     *,         vecsimple, vecsimple,
1233                vecsimple, *,         *,         *,         vecstore,  vecload")
1234
1235   (set_attr "length"
1236               "4,         4,         4,         8,         4,         8,
1237                8,         8,         8,         8,         4,         4,
1238                4,         8,         20,        20,        4,         4")])
1239
1240;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1241;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
1242;;              LVX (VMX)  STVX (VMX)
1243(define_insn "*vsx_mov<mode>_32bit"
1244  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1245               "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       <??r>,
1246                wo,        v,         ?<VSa>,    *r,        v,         ??r,
1247                wZ,        v")
1248
1249	(match_operand:VSX_M 1 "input_operand"
1250               "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
1251                wE,        jwM,       ?jwM,      jwM,       W,         W,
1252                v,         wZ"))]
1253
1254  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1255   && (register_operand (operands[0], <MODE>mode)
1256       || register_operand (operands[1], <MODE>mode))"
1257{
1258  return rs6000_output_move_128bit (operands);
1259}
1260  [(set_attr "type"
1261               "vecstore,  vecload,   vecsimple, load,      store,    *,
1262                vecsimple, vecsimple, vecsimple, *,         *,        *,
1263                vecstore,  vecload")
1264
1265   (set_attr "length"
1266               "4,         4,         4,         16,        16,        16,
1267                4,         4,         4,         16,        20,        32,
1268                4,         4")])
1269
1270;; Explicit  load/store expanders for the builtin functions
1271(define_expand "vsx_load_<mode>"
1272  [(set (match_operand:VSX_M 0 "vsx_register_operand")
1273	(match_operand:VSX_M 1 "memory_operand"))]
1274  "VECTOR_MEM_VSX_P (<MODE>mode)"
1275{
1276  /* Expand to swaps if needed, prior to swap optimization.  */
1277  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1278    {
1279      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1280      DONE;
1281    }
1282})
1283
1284(define_expand "vsx_store_<mode>"
1285  [(set (match_operand:VSX_M 0 "memory_operand")
1286	(match_operand:VSX_M 1 "vsx_register_operand"))]
1287  "VECTOR_MEM_VSX_P (<MODE>mode)"
1288{
1289  /* Expand to swaps if needed, prior to swap optimization.  */
1290  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1291    {
1292      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1293      DONE;
1294    }
1295})
1296
1297;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1298;; when you really want their element-reversing behavior.
1299(define_insn "vsx_ld_elemrev_v2di"
1300  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1301        (vec_select:V2DI
1302	  (match_operand:V2DI 1 "memory_operand" "Z")
1303	  (parallel [(const_int 1) (const_int 0)])))]
1304  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1305  "lxvd2x %x0,%y1"
1306  [(set_attr "type" "vecload")])
1307
1308(define_insn "vsx_ld_elemrev_v1ti"
1309  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1310        (vec_select:V1TI
1311	  (match_operand:V1TI 1 "memory_operand" "Z")
1312	  (parallel [(const_int 0)])))]
1313  "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1314{
1315   return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1316}
1317  [(set_attr "type" "vecload")])
1318
1319(define_insn "vsx_ld_elemrev_v2df"
1320  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1321        (vec_select:V2DF
1322	  (match_operand:V2DF 1 "memory_operand" "Z")
1323	  (parallel [(const_int 1) (const_int 0)])))]
1324  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1325  "lxvd2x %x0,%y1"
1326  [(set_attr "type" "vecload")])
1327
1328(define_insn "vsx_ld_elemrev_v4si"
1329  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1330        (vec_select:V4SI
1331	  (match_operand:V4SI 1 "memory_operand" "Z")
1332	  (parallel [(const_int 3) (const_int 2)
1333	             (const_int 1) (const_int 0)])))]
1334  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1335  "lxvw4x %x0,%y1"
1336  [(set_attr "type" "vecload")])
1337
1338(define_insn "vsx_ld_elemrev_v4sf"
1339  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1340        (vec_select:V4SF
1341	  (match_operand:V4SF 1 "memory_operand" "Z")
1342	  (parallel [(const_int 3) (const_int 2)
1343	             (const_int 1) (const_int 0)])))]
1344  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1345  "lxvw4x %x0,%y1"
1346  [(set_attr "type" "vecload")])
1347
1348(define_expand "vsx_ld_elemrev_v8hi"
1349  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1350        (vec_select:V8HI
1351	  (match_operand:V8HI 1 "memory_operand" "Z")
1352	  (parallel [(const_int 7) (const_int 6)
1353	             (const_int 5) (const_int 4)
1354		     (const_int 3) (const_int 2)
1355	             (const_int 1) (const_int 0)])))]
1356  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1357{
1358  if (!TARGET_P9_VECTOR)
1359    {
1360      rtx tmp = gen_reg_rtx (V4SImode);
1361      rtx subreg, subreg2, perm[16], pcv;
1362      /* 2 is leftmost element in register */
1363      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1364      int i;
1365
1366      subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1367      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1368      subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1369
1370      for (i = 0; i < 16; ++i)
1371      	perm[i] = GEN_INT (reorder[i]);
1372
1373      pcv = force_reg (V16QImode,
1374                       gen_rtx_CONST_VECTOR (V16QImode,
1375                                             gen_rtvec_v (16, perm)));
1376      emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1377                                                subreg2, pcv));
1378      DONE;
1379    }
1380})
1381
1382(define_insn "*vsx_ld_elemrev_v8hi_internal"
1383  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1384        (vec_select:V8HI
1385          (match_operand:V8HI 1 "memory_operand" "Z")
1386          (parallel [(const_int 7) (const_int 6)
1387                     (const_int 5) (const_int 4)
1388                     (const_int 3) (const_int 2)
1389                     (const_int 1) (const_int 0)])))]
1390  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1391  "lxvh8x %x0,%y1"
1392  [(set_attr "type" "vecload")])
1393
1394(define_expand "vsx_ld_elemrev_v16qi"
1395  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1396        (vec_select:V16QI
1397          (match_operand:V16QI 1 "memory_operand" "Z")
1398          (parallel [(const_int 15) (const_int 14)
1399                     (const_int 13) (const_int 12)
1400                     (const_int 11) (const_int 10)
1401                     (const_int  9) (const_int  8)
1402                     (const_int  7) (const_int  6)
1403                     (const_int  5) (const_int  4)
1404                     (const_int  3) (const_int  2)
1405                     (const_int  1) (const_int  0)])))]
1406  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1407{
1408  if (!TARGET_P9_VECTOR)
1409    {
1410      rtx tmp = gen_reg_rtx (V4SImode);
1411      rtx subreg, subreg2, perm[16], pcv;
1412      /* 3 is leftmost element in register */
1413      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1414      int i;
1415
1416      subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1417      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1418      subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1419
1420      for (i = 0; i < 16; ++i)
1421        perm[i] = GEN_INT (reorder[i]);
1422
1423      pcv = force_reg (V16QImode,
1424                       gen_rtx_CONST_VECTOR (V16QImode,
1425                                             gen_rtvec_v (16, perm)));
1426      emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1427                                                 subreg2, pcv));
1428      DONE;
1429    }
1430})
1431
1432(define_insn "*vsx_ld_elemrev_v16qi_internal"
1433  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1434        (vec_select:V16QI
1435          (match_operand:V16QI 1 "memory_operand" "Z")
1436          (parallel [(const_int 15) (const_int 14)
1437                     (const_int 13) (const_int 12)
1438                     (const_int 11) (const_int 10)
1439                     (const_int  9) (const_int  8)
1440                     (const_int  7) (const_int  6)
1441                     (const_int  5) (const_int  4)
1442                     (const_int  3) (const_int  2)
1443                     (const_int  1) (const_int  0)])))]
1444  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1445  "lxvb16x %x0,%y1"
1446  [(set_attr "type" "vecload")])
1447
1448(define_insn "vsx_st_elemrev_v1ti"
1449  [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1450        (vec_select:V1TI
1451          (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1452          (parallel [(const_int 0)])))
1453   (clobber (match_dup 1))]
1454  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1455{
1456  return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1457}
1458  [(set_attr "type" "vecstore")])
1459
1460(define_insn "vsx_st_elemrev_v2df"
1461  [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1462        (vec_select:V2DF
1463          (match_operand:V2DF 1 "vsx_register_operand" "wa")
1464          (parallel [(const_int 1) (const_int 0)])))]
1465  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1466  "stxvd2x %x1,%y0"
1467  [(set_attr "type" "vecstore")])
1468
1469(define_insn "vsx_st_elemrev_v2di"
1470  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1471        (vec_select:V2DI
1472          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1473          (parallel [(const_int 1) (const_int 0)])))]
1474  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1475  "stxvd2x %x1,%y0"
1476  [(set_attr "type" "vecstore")])
1477
1478(define_insn "vsx_st_elemrev_v4sf"
1479  [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1480        (vec_select:V4SF
1481          (match_operand:V4SF 1 "vsx_register_operand" "wa")
1482          (parallel [(const_int 3) (const_int 2)
1483                     (const_int 1) (const_int 0)])))]
1484  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1485  "stxvw4x %x1,%y0"
1486  [(set_attr "type" "vecstore")])
1487
1488(define_insn "vsx_st_elemrev_v4si"
1489  [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1490        (vec_select:V4SI
1491	  (match_operand:V4SI 1 "vsx_register_operand" "wa")
1492	  (parallel [(const_int 3) (const_int 2)
1493	             (const_int 1) (const_int 0)])))]
1494  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1495  "stxvw4x %x1,%y0"
1496  [(set_attr "type" "vecstore")])
1497
1498(define_expand "vsx_st_elemrev_v8hi"
1499  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1500        (vec_select:V8HI
1501          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1502          (parallel [(const_int 7) (const_int 6)
1503                     (const_int 5) (const_int 4)
1504                     (const_int 3) (const_int 2)
1505                     (const_int 1) (const_int 0)])))]
1506  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1507{
1508  if (!TARGET_P9_VECTOR)
1509    {
1510      rtx mem_subreg, subreg, perm[16], pcv;
1511      rtx tmp = gen_reg_rtx (V8HImode);
1512      /* 2 is leftmost element in register */
1513      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1514      int i;
1515
1516      for (i = 0; i < 16; ++i)
1517      	perm[i] = GEN_INT (reorder[i]);
1518
1519      pcv = force_reg (V16QImode,
1520                       gen_rtx_CONST_VECTOR (V16QImode,
1521                                             gen_rtvec_v (16, perm)));
1522      emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1523                                                operands[1], pcv));
1524      subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1525      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1526      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1527      DONE;
1528    }
1529})
1530
1531(define_insn "*vsx_st_elemrev_v2di_internal"
1532  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1533        (vec_select:V2DI
1534          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1535          (parallel [(const_int 1) (const_int 0)])))]
1536  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1537  "stxvd2x %x1,%y0"
1538  [(set_attr "type" "vecstore")])
1539
1540(define_insn "*vsx_st_elemrev_v8hi_internal"
1541  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1542        (vec_select:V8HI
1543          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1544          (parallel [(const_int 7) (const_int 6)
1545                     (const_int 5) (const_int 4)
1546                     (const_int 3) (const_int 2)
1547                     (const_int 1) (const_int 0)])))]
1548  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1549  "stxvh8x %x1,%y0"
1550  [(set_attr "type" "vecstore")])
1551
1552(define_expand "vsx_st_elemrev_v16qi"
1553  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1554        (vec_select:V16QI
1555          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1556          (parallel [(const_int 15) (const_int 14)
1557                     (const_int 13) (const_int 12)
1558                     (const_int 11) (const_int 10)
1559                     (const_int  9) (const_int  8)
1560                     (const_int  7) (const_int  6)
1561                     (const_int  5) (const_int  4)
1562                     (const_int  3) (const_int  2)
1563                     (const_int  1) (const_int  0)])))]
1564  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1565{
1566  if (!TARGET_P9_VECTOR)
1567    {
1568      rtx mem_subreg, subreg, perm[16], pcv;
1569      rtx tmp = gen_reg_rtx (V16QImode);
1570      /* 3 is leftmost element in register */
1571      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1572      int i;
1573
1574      for (i = 0; i < 16; ++i)
1575      	perm[i] = GEN_INT (reorder[i]);
1576
1577      pcv = force_reg (V16QImode,
1578                       gen_rtx_CONST_VECTOR (V16QImode,
1579                                             gen_rtvec_v (16, perm)));
1580      emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1581                                                 operands[1], pcv));
1582      subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1583      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1584      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1585      DONE;
1586    }
1587})
1588
1589(define_insn "*vsx_st_elemrev_v16qi_internal"
1590  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1591        (vec_select:V16QI
1592          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1593          (parallel [(const_int 15) (const_int 14)
1594                     (const_int 13) (const_int 12)
1595                     (const_int 11) (const_int 10)
1596                     (const_int  9) (const_int  8)
1597                     (const_int  7) (const_int  6)
1598                     (const_int  5) (const_int  4)
1599                     (const_int  3) (const_int  2)
1600                     (const_int  1) (const_int  0)])))]
1601  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1602  "stxvb16x %x1,%y0"
1603  [(set_attr "type" "vecstore")])
1604
1605
1606;; VSX vector floating point arithmetic instructions.  The VSX scalar
1607;; instructions are now combined with the insn for the traditional floating
1608;; point unit.
1609(define_insn "*vsx_add<mode>3"
1610  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1611        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1612		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1613  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1614  "xvadd<VSs> %x0,%x1,%x2"
1615  [(set_attr "type" "<VStype_simple>")
1616   (set_attr "fp_type" "<VSfptype_simple>")])
1617
1618(define_insn "*vsx_sub<mode>3"
1619  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1620        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1621		     (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1622  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1623  "xvsub<VSs> %x0,%x1,%x2"
1624  [(set_attr "type" "<VStype_simple>")
1625   (set_attr "fp_type" "<VSfptype_simple>")])
1626
1627(define_insn "*vsx_mul<mode>3"
1628  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1629        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1630		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1631  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1632  "xvmul<VSs> %x0,%x1,%x2"
1633  [(set_attr "type" "<VStype_simple>")
1634   (set_attr "fp_type" "<VSfptype_mul>")])
1635
1636; Emulate vector with scalar for vec_mul in V2DImode
1637(define_insn_and_split "vsx_mul_v2di"
1638  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1639        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1640                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1641                     UNSPEC_VSX_MULSD))]
1642  "VECTOR_MEM_VSX_P (V2DImode)"
1643  "#"
1644  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1645  [(const_int 0)]
1646{
1647  rtx op0 = operands[0];
1648  rtx op1 = operands[1];
1649  rtx op2 = operands[2];
1650  rtx op3 = gen_reg_rtx (DImode);
1651  rtx op4 = gen_reg_rtx (DImode);
1652  rtx op5 = gen_reg_rtx (DImode);
1653  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1654  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1655  if (TARGET_POWERPC64)
1656    emit_insn (gen_muldi3 (op5, op3, op4));
1657  else
1658    {
1659      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1660      emit_move_insn (op5, ret);
1661    }
1662  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1663  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1664  if (TARGET_POWERPC64)
1665    emit_insn (gen_muldi3 (op3, op3, op4));
1666  else
1667    {
1668      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1669      emit_move_insn (op3, ret);
1670    }
1671  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1672  DONE;
1673}
1674  [(set_attr "type" "mul")])
1675
1676(define_insn "*vsx_div<mode>3"
1677  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1678        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1679		   (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1680  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1681  "xvdiv<VSs> %x0,%x1,%x2"
1682  [(set_attr "type" "<VStype_div>")
1683   (set_attr "fp_type" "<VSfptype_div>")])
1684
1685; Emulate vector with scalar for vec_div in V2DImode
1686(define_insn_and_split "vsx_div_v2di"
1687  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1688        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1689                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1690                     UNSPEC_VSX_DIVSD))]
1691  "VECTOR_MEM_VSX_P (V2DImode)"
1692  "#"
1693  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1694  [(const_int 0)]
1695{
1696  rtx op0 = operands[0];
1697  rtx op1 = operands[1];
1698  rtx op2 = operands[2];
1699  rtx op3 = gen_reg_rtx (DImode);
1700  rtx op4 = gen_reg_rtx (DImode);
1701  rtx op5 = gen_reg_rtx (DImode);
1702  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1703  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1704  if (TARGET_POWERPC64)
1705    emit_insn (gen_divdi3 (op5, op3, op4));
1706  else
1707    {
1708      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1709      rtx target = emit_library_call_value (libfunc,
1710					    op5, LCT_NORMAL, DImode,
1711					    op3, DImode,
1712					    op4, DImode);
1713      emit_move_insn (op5, target);
1714    }
1715  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1716  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1717  if (TARGET_POWERPC64)
1718    emit_insn (gen_divdi3 (op3, op3, op4));
1719  else
1720    {
1721      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1722      rtx target = emit_library_call_value (libfunc,
1723					    op3, LCT_NORMAL, DImode,
1724					    op3, DImode,
1725					    op4, DImode);
1726      emit_move_insn (op3, target);
1727    }
1728  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1729  DONE;
1730}
1731  [(set_attr "type" "div")])
1732
1733(define_insn_and_split "vsx_udiv_v2di"
1734  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1735        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1736                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1737                     UNSPEC_VSX_DIVUD))]
1738  "VECTOR_MEM_VSX_P (V2DImode)"
1739  "#"
1740  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1741  [(const_int 0)]
1742{
1743  rtx op0 = operands[0];
1744  rtx op1 = operands[1];
1745  rtx op2 = operands[2];
1746  rtx op3 = gen_reg_rtx (DImode);
1747  rtx op4 = gen_reg_rtx (DImode);
1748  rtx op5 = gen_reg_rtx (DImode);
1749  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1750  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1751  if (TARGET_POWERPC64)
1752    emit_insn (gen_udivdi3 (op5, op3, op4));
1753  else
1754    {
1755      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1756      rtx target = emit_library_call_value (libfunc,
1757					    op5, LCT_NORMAL, DImode,
1758					    op3, DImode,
1759					    op4, DImode);
1760      emit_move_insn (op5, target);
1761    }
1762  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1763  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1764  if (TARGET_POWERPC64)
1765    emit_insn (gen_udivdi3 (op3, op3, op4));
1766  else
1767    {
1768      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1769      rtx target = emit_library_call_value (libfunc,
1770					    op3, LCT_NORMAL, DImode,
1771					    op3, DImode,
1772					    op4, DImode);
1773      emit_move_insn (op3, target);
1774    }
1775  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1776  DONE;
1777}
1778  [(set_attr "type" "div")])
1779
1780;; *tdiv* instruction returning the FG flag
1781(define_expand "vsx_tdiv<mode>3_fg"
1782  [(set (match_dup 3)
1783	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1784		      (match_operand:VSX_B 2 "vsx_register_operand")]
1785		     UNSPEC_VSX_TDIV))
1786   (set (match_operand:SI 0 "gpc_reg_operand")
1787	(gt:SI (match_dup 3)
1788	       (const_int 0)))]
1789  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1790{
1791  operands[3] = gen_reg_rtx (CCFPmode);
1792})
1793
1794;; *tdiv* instruction returning the FE flag
1795(define_expand "vsx_tdiv<mode>3_fe"
1796  [(set (match_dup 3)
1797	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1798		      (match_operand:VSX_B 2 "vsx_register_operand")]
1799		     UNSPEC_VSX_TDIV))
1800   (set (match_operand:SI 0 "gpc_reg_operand")
1801	(eq:SI (match_dup 3)
1802	       (const_int 0)))]
1803  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1804{
1805  operands[3] = gen_reg_rtx (CCFPmode);
1806})
1807
1808(define_insn "*vsx_tdiv<mode>3_internal"
1809  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1810	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1811		      (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1812		   UNSPEC_VSX_TDIV))]
1813  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1814  "x<VSv>tdiv<VSs> %0,%x1,%x2"
1815  [(set_attr "type" "<VStype_simple>")
1816   (set_attr "fp_type" "<VSfptype_simple>")])
1817
1818(define_insn "vsx_fre<mode>2"
1819  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1820	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1821		      UNSPEC_FRES))]
1822  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1823  "xvre<VSs> %x0,%x1"
1824  [(set_attr "type" "<VStype_simple>")
1825   (set_attr "fp_type" "<VSfptype_simple>")])
1826
1827(define_insn "*vsx_neg<mode>2"
1828  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1829        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1830  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1831  "xvneg<VSs> %x0,%x1"
1832  [(set_attr "type" "<VStype_simple>")
1833   (set_attr "fp_type" "<VSfptype_simple>")])
1834
1835(define_insn "*vsx_abs<mode>2"
1836  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1837        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1838  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1839  "xvabs<VSs> %x0,%x1"
1840  [(set_attr "type" "<VStype_simple>")
1841   (set_attr "fp_type" "<VSfptype_simple>")])
1842
1843(define_insn "vsx_nabs<mode>2"
1844  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1845        (neg:VSX_F
1846	 (abs:VSX_F
1847	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1848  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1849  "xvnabs<VSs> %x0,%x1"
1850  [(set_attr "type" "<VStype_simple>")
1851   (set_attr "fp_type" "<VSfptype_simple>")])
1852
1853(define_insn "vsx_smax<mode>3"
1854  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1855        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1856		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1857  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1858  "xvmax<VSs> %x0,%x1,%x2"
1859  [(set_attr "type" "<VStype_simple>")
1860   (set_attr "fp_type" "<VSfptype_simple>")])
1861
1862(define_insn "*vsx_smin<mode>3"
1863  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1864        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1865		    (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1866  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1867  "xvmin<VSs> %x0,%x1,%x2"
1868  [(set_attr "type" "<VStype_simple>")
1869   (set_attr "fp_type" "<VSfptype_simple>")])
1870
1871(define_insn "*vsx_sqrt<mode>2"
1872  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1873        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1874  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1875  "xvsqrt<VSs> %x0,%x1"
1876  [(set_attr "type" "<VStype_sqrt>")
1877   (set_attr "fp_type" "<VSfptype_sqrt>")])
1878
1879(define_insn "*vsx_rsqrte<mode>2"
1880  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1881	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1882		      UNSPEC_RSQRT))]
1883  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1884  "xvrsqrte<VSs> %x0,%x1"
1885  [(set_attr "type" "<VStype_simple>")
1886   (set_attr "fp_type" "<VSfptype_simple>")])
1887
1888;; *tsqrt* returning the fg flag
1889(define_expand "vsx_tsqrt<mode>2_fg"
1890  [(set (match_dup 2)
1891	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1892		     UNSPEC_VSX_TSQRT))
1893   (set (match_operand:SI 0 "gpc_reg_operand")
1894	(gt:SI (match_dup 2)
1895	       (const_int 0)))]
1896  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1897{
1898  operands[2] = gen_reg_rtx (CCFPmode);
1899})
1900
1901;; *tsqrt* returning the fe flag
1902(define_expand "vsx_tsqrt<mode>2_fe"
1903  [(set (match_dup 2)
1904	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1905		     UNSPEC_VSX_TSQRT))
1906   (set (match_operand:SI 0 "gpc_reg_operand")
1907	(eq:SI (match_dup 2)
1908	       (const_int 0)))]
1909  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1910{
1911  operands[2] = gen_reg_rtx (CCFPmode);
1912})
1913
1914(define_insn "*vsx_tsqrt<mode>2_internal"
1915  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1916	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1917		     UNSPEC_VSX_TSQRT))]
1918  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1919  "x<VSv>tsqrt<VSs> %0,%x1"
1920  [(set_attr "type" "<VStype_simple>")
1921   (set_attr "fp_type" "<VSfptype_simple>")])
1922
1923;; Fused vector multiply/add instructions. Support the classical Altivec
1924;; versions of fma, which allows the target to be a separate register from the
1925;; 3 inputs.  Under VSX, the target must be either the addend or the first
1926;; multiply.
1927
1928(define_insn "*vsx_fmav4sf4"
1929  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1930	(fma:V4SF
1931	  (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1932	  (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1933	  (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1934  "VECTOR_UNIT_VSX_P (V4SFmode)"
1935  "@
1936   xvmaddasp %x0,%x1,%x2
1937   xvmaddmsp %x0,%x1,%x3
1938   xvmaddasp %x0,%x1,%x2
1939   xvmaddmsp %x0,%x1,%x3
1940   vmaddfp %0,%1,%2,%3"
1941  [(set_attr "type" "vecfloat")])
1942
1943(define_insn "*vsx_fmav2df4"
1944  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1945	(fma:V2DF
1946	  (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1947	  (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1948	  (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1949  "VECTOR_UNIT_VSX_P (V2DFmode)"
1950  "@
1951   xvmaddadp %x0,%x1,%x2
1952   xvmaddmdp %x0,%x1,%x3
1953   xvmaddadp %x0,%x1,%x2
1954   xvmaddmdp %x0,%x1,%x3"
1955  [(set_attr "type" "vecdouble")])
1956
1957(define_insn "*vsx_fms<mode>4"
1958  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1959	(fma:VSX_F
1960	  (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1961	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1962	  (neg:VSX_F
1963	    (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1964  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1965  "@
1966   xvmsuba<VSs> %x0,%x1,%x2
1967   xvmsubm<VSs> %x0,%x1,%x3
1968   xvmsuba<VSs> %x0,%x1,%x2
1969   xvmsubm<VSs> %x0,%x1,%x3"
1970  [(set_attr "type" "<VStype_mul>")])
1971
1972(define_insn "*vsx_nfma<mode>4"
1973  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1974	(neg:VSX_F
1975	 (fma:VSX_F
1976	  (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1977	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1978	  (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1979  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1980  "@
1981   xvnmadda<VSs> %x0,%x1,%x2
1982   xvnmaddm<VSs> %x0,%x1,%x3
1983   xvnmadda<VSs> %x0,%x1,%x2
1984   xvnmaddm<VSs> %x0,%x1,%x3"
1985  [(set_attr "type" "<VStype_mul>")
1986   (set_attr "fp_type" "<VSfptype_mul>")])
1987
1988(define_insn "*vsx_nfmsv4sf4"
1989  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1990	(neg:V4SF
1991	 (fma:V4SF
1992	   (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1993	   (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1994	   (neg:V4SF
1995	     (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1996  "VECTOR_UNIT_VSX_P (V4SFmode)"
1997  "@
1998   xvnmsubasp %x0,%x1,%x2
1999   xvnmsubmsp %x0,%x1,%x3
2000   xvnmsubasp %x0,%x1,%x2
2001   xvnmsubmsp %x0,%x1,%x3
2002   vnmsubfp %0,%1,%2,%3"
2003  [(set_attr "type" "vecfloat")])
2004
2005(define_insn "*vsx_nfmsv2df4"
2006  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
2007	(neg:V2DF
2008	 (fma:V2DF
2009	   (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
2010	   (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
2011	   (neg:V2DF
2012	     (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
2013  "VECTOR_UNIT_VSX_P (V2DFmode)"
2014  "@
2015   xvnmsubadp %x0,%x1,%x2
2016   xvnmsubmdp %x0,%x1,%x3
2017   xvnmsubadp %x0,%x1,%x2
2018   xvnmsubmdp %x0,%x1,%x3"
2019  [(set_attr "type" "vecdouble")])
2020
2021;; Vector conditional expressions (no scalar version for these instructions)
2022(define_insn "vsx_eq<mode>"
2023  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024	(eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2025		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2026  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027  "xvcmpeq<VSs> %x0,%x1,%x2"
2028  [(set_attr "type" "<VStype_simple>")
2029   (set_attr "fp_type" "<VSfptype_simple>")])
2030
2031(define_insn "vsx_gt<mode>"
2032  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2033	(gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2034		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2035  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2036  "xvcmpgt<VSs> %x0,%x1,%x2"
2037  [(set_attr "type" "<VStype_simple>")
2038   (set_attr "fp_type" "<VSfptype_simple>")])
2039
2040(define_insn "*vsx_ge<mode>"
2041  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2042	(ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2043		  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
2044  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2045  "xvcmpge<VSs> %x0,%x1,%x2"
2046  [(set_attr "type" "<VStype_simple>")
2047   (set_attr "fp_type" "<VSfptype_simple>")])
2048
2049;; Compare vectors producing a vector result and a predicate, setting CR6 to
2050;; indicate a combined status
2051(define_insn "*vsx_eq_<mode>_p"
2052  [(set (reg:CC CR6_REGNO)
2053	(unspec:CC
2054	 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2055		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2056	 UNSPEC_PREDICATE))
2057   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2058	(eq:VSX_F (match_dup 1)
2059		  (match_dup 2)))]
2060  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2061  "xvcmpeq<VSs>. %x0,%x1,%x2"
2062  [(set_attr "type" "<VStype_simple>")])
2063
2064(define_insn "*vsx_gt_<mode>_p"
2065  [(set (reg:CC CR6_REGNO)
2066	(unspec:CC
2067	 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2068		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2069	 UNSPEC_PREDICATE))
2070   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2071	(gt:VSX_F (match_dup 1)
2072		  (match_dup 2)))]
2073  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074  "xvcmpgt<VSs>. %x0,%x1,%x2"
2075  [(set_attr "type" "<VStype_simple>")])
2076
2077(define_insn "*vsx_ge_<mode>_p"
2078  [(set (reg:CC CR6_REGNO)
2079	(unspec:CC
2080	 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2081		 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
2082	 UNSPEC_PREDICATE))
2083   (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2084	(ge:VSX_F (match_dup 1)
2085		  (match_dup 2)))]
2086  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087  "xvcmpge<VSs>. %x0,%x1,%x2"
2088  [(set_attr "type" "<VStype_simple>")])
2089
2090;; Vector select
2091(define_insn "*vsx_xxsel<mode>"
2092  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2093	(if_then_else:VSX_L
2094	 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2095		(match_operand:VSX_L 4 "zero_constant" ""))
2096	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2097	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2098  "VECTOR_MEM_VSX_P (<MODE>mode)"
2099  "xxsel %x0,%x3,%x2,%x1"
2100  [(set_attr "type" "vecmove")])
2101
2102(define_insn "*vsx_xxsel<mode>_uns"
2103  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2104	(if_then_else:VSX_L
2105	 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
2106		   (match_operand:VSX_L 4 "zero_constant" ""))
2107	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2108	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
2109  "VECTOR_MEM_VSX_P (<MODE>mode)"
2110  "xxsel %x0,%x3,%x2,%x1"
2111  [(set_attr "type" "vecmove")])
2112
2113;; Copy sign
2114(define_insn "vsx_copysign<mode>3"
2115  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2116	(unspec:VSX_F
2117	 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2118	  (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
2119	 UNSPEC_COPYSIGN))]
2120  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2121  "xvcpsgn<VSs> %x0,%x2,%x1"
2122  [(set_attr "type" "<VStype_simple>")
2123   (set_attr "fp_type" "<VSfptype_simple>")])
2124
2125;; For the conversions, limit the register class for the integer value to be
2126;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2127;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2128;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2129;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2130;; in allowing virtual registers.
2131(define_insn "vsx_float<VSi><mode>2"
2132  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2133	(float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2134  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2135  "xvcvsx<VSc><VSs> %x0,%x1"
2136  [(set_attr "type" "<VStype_simple>")
2137   (set_attr "fp_type" "<VSfptype_simple>")])
2138
2139(define_insn "vsx_floatuns<VSi><mode>2"
2140  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2141	(unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
2142  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2143  "xvcvux<VSc><VSs> %x0,%x1"
2144  [(set_attr "type" "<VStype_simple>")
2145   (set_attr "fp_type" "<VSfptype_simple>")])
2146
2147(define_insn "vsx_fix_trunc<mode><VSi>2"
2148  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2149	(fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2150  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2151  "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2152  [(set_attr "type" "<VStype_simple>")
2153   (set_attr "fp_type" "<VSfptype_simple>")])
2154
2155(define_insn "vsx_fixuns_trunc<mode><VSi>2"
2156  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
2157	(unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
2158  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2159  "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2160  [(set_attr "type" "<VStype_simple>")
2161   (set_attr "fp_type" "<VSfptype_simple>")])
2162
2163;; Math rounding functions
2164(define_insn "vsx_x<VSv>r<VSs>i"
2165  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2167		      UNSPEC_VSX_ROUND_I))]
2168  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169  "x<VSv>r<VSs>i %x0,%x1"
2170  [(set_attr "type" "<VStype_simple>")
2171   (set_attr "fp_type" "<VSfptype_simple>")])
2172
2173(define_insn "vsx_x<VSv>r<VSs>ic"
2174  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2175	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2176		      UNSPEC_VSX_ROUND_IC))]
2177  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2178  "x<VSv>r<VSs>ic %x0,%x1"
2179  [(set_attr "type" "<VStype_simple>")
2180   (set_attr "fp_type" "<VSfptype_simple>")])
2181
2182(define_insn "vsx_btrunc<mode>2"
2183  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2184	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
2185  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2186  "xvr<VSs>iz %x0,%x1"
2187  [(set_attr "type" "<VStype_simple>")
2188   (set_attr "fp_type" "<VSfptype_simple>")])
2189
2190(define_insn "*vsx_b2trunc<mode>2"
2191  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2192	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
2193		      UNSPEC_FRIZ))]
2194  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2195  "x<VSv>r<VSs>iz %x0,%x1"
2196  [(set_attr "type" "<VStype_simple>")
2197   (set_attr "fp_type" "<VSfptype_simple>")])
2198
2199(define_insn "vsx_floor<mode>2"
2200  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2201	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2202		      UNSPEC_FRIM))]
2203  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2204  "xvr<VSs>im %x0,%x1"
2205  [(set_attr "type" "<VStype_simple>")
2206   (set_attr "fp_type" "<VSfptype_simple>")])
2207
2208(define_insn "vsx_ceil<mode>2"
2209  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2210	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
2211		      UNSPEC_FRIP))]
2212  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2213  "xvr<VSs>ip %x0,%x1"
2214  [(set_attr "type" "<VStype_simple>")
2215   (set_attr "fp_type" "<VSfptype_simple>")])
2216
2217
2218;; VSX convert to/from double vector
2219
2220;; Convert between single and double precision
2221;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2222;; scalar single precision instructions internally use the double format.
2223;; Prefer the altivec registers, since we likely will need to do a vperm
2224(define_insn "vsx_<VS_spdp_insn>"
2225  [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2226	(unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
2227			      UNSPEC_VSX_CVSPDP))]
2228  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2229  "<VS_spdp_insn> %x0,%x1"
2230  [(set_attr "type" "<VS_spdp_type>")])
2231
2232;; xscvspdp, represent the scalar SF type as V4SF
2233(define_insn "vsx_xscvspdp"
2234  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2236		   UNSPEC_VSX_CVSPDP))]
2237  "VECTOR_UNIT_VSX_P (V4SFmode)"
2238  "xscvspdp %x0,%x1"
2239  [(set_attr "type" "fp")])
2240
2241;; Same as vsx_xscvspdp, but use SF as the type
2242(define_insn "vsx_xscvspdp_scalar2"
2243  [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2244	(unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2245		   UNSPEC_VSX_CVSPDP))]
2246  "VECTOR_UNIT_VSX_P (V4SFmode)"
2247  "xscvspdp %x0,%x1"
2248  [(set_attr "type" "fp")])
2249
2250;; Generate xvcvhpsp instruction
2251(define_insn "vsx_xvcvhpsp"
2252  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2253	(unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2254		     UNSPEC_VSX_CVHPSP))]
2255  "TARGET_P9_VECTOR"
2256  "xvcvhpsp %x0,%x1"
2257  [(set_attr "type" "vecfloat")])
2258
2259;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2260;; format of scalars is actually DF.
2261(define_insn "vsx_xscvdpsp_scalar"
2262  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2263	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2264		     UNSPEC_VSX_CVSPDP))]
2265  "VECTOR_UNIT_VSX_P (V4SFmode)"
2266  "xscvdpsp %x0,%x1"
2267  [(set_attr "type" "fp")])
2268
2269;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2270(define_insn "vsx_xscvdpspn"
2271  [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2272	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
2273		     UNSPEC_VSX_CVDPSPN))]
2274  "TARGET_XSCVDPSPN"
2275  "xscvdpspn %x0,%x1"
2276  [(set_attr "type" "fp")])
2277
2278(define_insn "vsx_xscvspdpn"
2279  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2280	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2281		   UNSPEC_VSX_CVSPDPN))]
2282  "TARGET_XSCVSPDPN"
2283  "xscvspdpn %x0,%x1"
2284  [(set_attr "type" "fp")])
2285
2286(define_insn "vsx_xscvdpspn_scalar"
2287  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2288	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
2289		     UNSPEC_VSX_CVDPSPN))]
2290  "TARGET_XSCVDPSPN"
2291  "xscvdpspn %x0,%x1"
2292  [(set_attr "type" "fp")])
2293
2294;; Used by direct move to move a SFmode value from GPR to VSX register
2295(define_insn "vsx_xscvspdpn_directmove"
2296  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2297	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2298		   UNSPEC_VSX_CVSPDPN))]
2299  "TARGET_XSCVSPDPN"
2300  "xscvspdpn %x0,%x1"
2301  [(set_attr "type" "fp")])
2302
2303;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2304
2305(define_expand "vsx_xvcvsxddp_scale"
2306  [(match_operand:V2DF 0 "vsx_register_operand")
2307   (match_operand:V2DI 1 "vsx_register_operand")
2308   (match_operand:QI 2 "immediate_operand")]
2309  "VECTOR_UNIT_VSX_P (V2DFmode)"
2310{
2311  rtx op0 = operands[0];
2312  rtx op1 = operands[1];
2313  int scale = INTVAL(operands[2]);
2314  emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2315  if (scale != 0)
2316    rs6000_scale_v2df (op0, op0, -scale);
2317  DONE;
2318})
2319
2320(define_insn "vsx_xvcvsxddp"
2321  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2322        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2323                     UNSPEC_VSX_XVCVSXDDP))]
2324  "VECTOR_UNIT_VSX_P (V2DFmode)"
2325  "xvcvsxddp %x0,%x1"
2326  [(set_attr "type" "vecdouble")])
2327
2328(define_expand "vsx_xvcvuxddp_scale"
2329  [(match_operand:V2DF 0 "vsx_register_operand")
2330   (match_operand:V2DI 1 "vsx_register_operand")
2331   (match_operand:QI 2 "immediate_operand")]
2332  "VECTOR_UNIT_VSX_P (V2DFmode)"
2333{
2334  rtx op0 = operands[0];
2335  rtx op1 = operands[1];
2336  int scale = INTVAL(operands[2]);
2337  emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2338  if (scale != 0)
2339    rs6000_scale_v2df (op0, op0, -scale);
2340  DONE;
2341})
2342
2343(define_insn "vsx_xvcvuxddp"
2344  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2345        (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2346                     UNSPEC_VSX_XVCVUXDDP))]
2347  "VECTOR_UNIT_VSX_P (V2DFmode)"
2348  "xvcvuxddp %x0,%x1"
2349  [(set_attr "type" "vecdouble")])
2350
2351(define_expand "vsx_xvcvdpsxds_scale"
2352  [(match_operand:V2DI 0 "vsx_register_operand")
2353   (match_operand:V2DF 1 "vsx_register_operand")
2354   (match_operand:QI 2 "immediate_operand")]
2355  "VECTOR_UNIT_VSX_P (V2DFmode)"
2356{
2357  rtx op0 = operands[0];
2358  rtx op1 = operands[1];
2359  rtx tmp;
2360  int scale = INTVAL (operands[2]);
2361  if (scale == 0)
2362    tmp = op1;
2363  else
2364    {
2365      tmp  = gen_reg_rtx (V2DFmode);
2366      rs6000_scale_v2df (tmp, op1, scale);
2367    }
2368  emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2369  DONE;
2370})
2371
2372;; convert vector of 64-bit floating point numbers to vector of
2373;; 64-bit signed integer
2374(define_insn "vsx_xvcvdpsxds"
2375  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2376        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2377                     UNSPEC_VSX_XVCVDPSXDS))]
2378  "VECTOR_UNIT_VSX_P (V2DFmode)"
2379  "xvcvdpsxds %x0,%x1"
2380  [(set_attr "type" "vecdouble")])
2381
2382;; convert vector of 32-bit floating point numbers to vector of
2383;; 32-bit signed integer
2384(define_insn "vsx_xvcvspsxws"
2385  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2386	(unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2387		     UNSPEC_VSX_XVCVSPSXWS))]
2388  "VECTOR_UNIT_VSX_P (V4SFmode)"
2389  "xvcvspsxws %x0,%x1"
2390  [(set_attr "type" "vecfloat")])
2391
2392;; convert vector of 64-bit floating point numbers to vector of
2393;; 64-bit unsigned integer
2394(define_expand "vsx_xvcvdpuxds_scale"
2395  [(match_operand:V2DI 0 "vsx_register_operand")
2396   (match_operand:V2DF 1 "vsx_register_operand")
2397   (match_operand:QI 2 "immediate_operand")]
2398  "VECTOR_UNIT_VSX_P (V2DFmode)"
2399{
2400  rtx op0 = operands[0];
2401  rtx op1 = operands[1];
2402  rtx tmp;
2403  int scale = INTVAL (operands[2]);
2404  if (scale == 0)
2405    tmp = op1;
2406  else
2407    {
2408      tmp = gen_reg_rtx (V2DFmode);
2409      rs6000_scale_v2df (tmp, op1, scale);
2410    }
2411  emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2412  DONE;
2413})
2414
2415;; convert vector of 32-bit floating point numbers to vector of
2416;; 32-bit unsigned integer
2417(define_insn "vsx_xvcvspuxws"
2418  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2419	(unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2420		     UNSPEC_VSX_XVCVSPSXWS))]
2421  "VECTOR_UNIT_VSX_P (V4SFmode)"
2422  "xvcvspuxws %x0,%x1"
2423  [(set_attr "type" "vecfloat")])
2424
2425(define_insn "vsx_xvcvdpuxds"
2426  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2427        (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2428                     UNSPEC_VSX_XVCVDPUXDS))]
2429  "VECTOR_UNIT_VSX_P (V2DFmode)"
2430  "xvcvdpuxds %x0,%x1"
2431  [(set_attr "type" "vecdouble")])
2432
2433;; Convert from 64-bit to 32-bit types
2434;; Note, favor the Altivec registers since the usual use of these instructions
2435;; is in vector converts and we need to use the Altivec vperm instruction.
2436
2437(define_insn "vsx_xvcvdpsxws"
2438  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2439	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2440		     UNSPEC_VSX_CVDPSXWS))]
2441  "VECTOR_UNIT_VSX_P (V2DFmode)"
2442  "xvcvdpsxws %x0,%x1"
2443  [(set_attr "type" "vecdouble")])
2444
2445(define_insn "vsx_xvcvdpuxws"
2446  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2447	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2448		     UNSPEC_VSX_CVDPUXWS))]
2449  "VECTOR_UNIT_VSX_P (V2DFmode)"
2450  "xvcvdpuxws %x0,%x1"
2451  [(set_attr "type" "vecdouble")])
2452
2453(define_insn "vsx_xvcvsxdsp"
2454  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2455	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2456		     UNSPEC_VSX_CVSXDSP))]
2457  "VECTOR_UNIT_VSX_P (V2DFmode)"
2458  "xvcvsxdsp %x0,%x1"
2459  [(set_attr "type" "vecfloat")])
2460
2461(define_insn "vsx_xvcvuxdsp"
2462  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2463	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
2464		     UNSPEC_VSX_CVUXDSP))]
2465  "VECTOR_UNIT_VSX_P (V2DFmode)"
2466  "xvcvuxdsp %x0,%x1"
2467  [(set_attr "type" "vecdouble")])
2468
2469(define_insn "vsx_xvcdpsp"
2470  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2471	(unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2472		     UNSPEC_VSX_XVCDPSP))]
2473  "VECTOR_UNIT_VSX_P (V2DFmode)"
2474  "xvcvdpsp %x0,%x1"
2475  [(set_attr "type" "vecdouble")])
2476
2477;; Convert from 32-bit to 64-bit types
2478;; Provide both vector and scalar targets
2479(define_insn "vsx_xvcvsxwdp"
2480  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2481	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2482		     UNSPEC_VSX_CVSXWDP))]
2483  "VECTOR_UNIT_VSX_P (V2DFmode)"
2484  "xvcvsxwdp %x0,%x1"
2485  [(set_attr "type" "vecdouble")])
2486
2487(define_insn "vsx_xvcvsxwdp_df"
2488  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2489	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2490		   UNSPEC_VSX_CVSXWDP))]
2491  "TARGET_VSX"
2492  "xvcvsxwdp %x0,%x1"
2493  [(set_attr "type" "vecdouble")])
2494
2495(define_insn "vsx_xvcvuxwdp"
2496  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2497	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2498		     UNSPEC_VSX_CVUXWDP))]
2499  "VECTOR_UNIT_VSX_P (V2DFmode)"
2500  "xvcvuxwdp %x0,%x1"
2501  [(set_attr "type" "vecdouble")])
2502
2503(define_insn "vsx_xvcvuxwdp_df"
2504  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2505	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2506		   UNSPEC_VSX_CVUXWDP))]
2507  "TARGET_VSX"
2508  "xvcvuxwdp %x0,%x1"
2509  [(set_attr "type" "vecdouble")])
2510
2511(define_insn "vsx_xvcvspsxds"
2512  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2513	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2514		     UNSPEC_VSX_CVSPSXDS))]
2515  "VECTOR_UNIT_VSX_P (V2DFmode)"
2516  "xvcvspsxds %x0,%x1"
2517  [(set_attr "type" "vecdouble")])
2518
2519(define_insn "vsx_xvcvspuxds"
2520  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2521	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2522		     UNSPEC_VSX_CVSPUXDS))]
2523  "VECTOR_UNIT_VSX_P (V2DFmode)"
2524  "xvcvspuxds %x0,%x1"
2525  [(set_attr "type" "vecdouble")])
2526
2527(define_insn "vsx_xvcvsxwsp"
2528  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2529	(unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2530		     UNSPEC_VSX_CVSXWSP))]
2531  "VECTOR_UNIT_VSX_P (V4SFmode)"
2532  "xvcvsxwsp %x0,%x1"
2533  [(set_attr "type" "vecfloat")])
2534
2535(define_insn "vsx_xvcvuxwsp"
2536  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2537	(unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2538		    UNSPEC_VSX_CVUXWSP))]
2539  "VECTOR_UNIT_VSX_P (V4SFmode)"
2540  "xvcvuxwsp %x0,%x1"
2541  [(set_attr "type" "vecfloat")])
2542
2543;; Generate float2 double
2544;; convert two double to float
2545(define_expand "float2_v2df"
2546  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2547   (use (match_operand:V2DF 1 "register_operand" "wa"))
2548   (use (match_operand:V2DF 2 "register_operand" "wa"))]
2549 "VECTOR_UNIT_VSX_P (V4SFmode)"
2550{
2551  rtx rtx_src1, rtx_src2, rtx_dst;
2552
2553  rtx_dst = operands[0];
2554  rtx_src1 = operands[1];
2555  rtx_src2 = operands[2];
2556
2557  rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2558  DONE;
2559})
2560
2561;; Generate float2
2562;; convert two long long signed ints to float
2563(define_expand "float2_v2di"
2564  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2565   (use (match_operand:V2DI 1 "register_operand" "wa"))
2566   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2567 "VECTOR_UNIT_VSX_P (V4SFmode)"
2568{
2569  rtx rtx_src1, rtx_src2, rtx_dst;
2570
2571  rtx_dst = operands[0];
2572  rtx_src1 = operands[1];
2573  rtx_src2 = operands[2];
2574
2575  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2576  DONE;
2577})
2578
2579;; Generate uns_float2
2580;; convert two long long unsigned ints to float
2581(define_expand "uns_float2_v2di"
2582  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2583   (use (match_operand:V2DI 1 "register_operand" "wa"))
2584   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2585 "VECTOR_UNIT_VSX_P (V4SFmode)"
2586{
2587  rtx rtx_src1, rtx_src2, rtx_dst;
2588
2589  rtx_dst = operands[0];
2590  rtx_src1 = operands[1];
2591  rtx_src2 = operands[2];
2592
2593  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2594  DONE;
2595})
2596
2597;; Generate floate
2598;; convert  double or long long signed to float
2599;; (Only even words are valid, BE numbering)
2600(define_expand "floate<mode>"
2601  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2602   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2603  "VECTOR_UNIT_VSX_P (V4SFmode)"
2604{
2605  if (VECTOR_ELT_ORDER_BIG)
2606    {
2607      /* Shift left one word to put even word correct location */
2608      rtx rtx_tmp;
2609      rtx rtx_val = GEN_INT (4);
2610
2611      rtx_tmp = gen_reg_rtx (V4SFmode);
2612      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2613      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2614		 rtx_tmp, rtx_tmp, rtx_val));
2615    }
2616  else
2617    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2618
2619  DONE;
2620})
2621
2622;; Generate uns_floate
2623;; convert long long unsigned to float
2624;; (Only even words are valid, BE numbering)
2625(define_expand "unsfloatev2di"
2626  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2627   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2628  "VECTOR_UNIT_VSX_P (V4SFmode)"
2629{
2630  if (VECTOR_ELT_ORDER_BIG)
2631    {
2632      /* Shift left one word to put even word correct location */
2633      rtx rtx_tmp;
2634      rtx rtx_val = GEN_INT (4);
2635
2636      rtx_tmp = gen_reg_rtx (V4SFmode);
2637      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2638      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2639		 rtx_tmp, rtx_tmp, rtx_val));
2640    }
2641  else
2642    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2643
2644  DONE;
2645})
2646
2647;; Generate floato
2648;; convert double or long long signed to float
2649;; Only odd words are valid, BE numbering)
2650(define_expand "floato<mode>"
2651  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2652   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2653  "VECTOR_UNIT_VSX_P (V4SFmode)"
2654{
2655  if (VECTOR_ELT_ORDER_BIG)
2656    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2657  else
2658    {
2659      /* Shift left one word to put odd word correct location */
2660      rtx rtx_tmp;
2661      rtx rtx_val = GEN_INT (4);
2662
2663      rtx_tmp = gen_reg_rtx (V4SFmode);
2664      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2665      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2666		 rtx_tmp, rtx_tmp, rtx_val));
2667    }
2668  DONE;
2669})
2670
2671;; Generate uns_floato
2672;; convert long long unsigned to float
2673;; (Only odd words are valid, BE numbering)
2674(define_expand "unsfloatov2di"
2675 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2676  (use (match_operand:V2DI 1 "register_operand" "wa"))]
2677 "VECTOR_UNIT_VSX_P (V4SFmode)"
2678{
2679  if (VECTOR_ELT_ORDER_BIG)
2680    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2681  else
2682    {
2683      /* Shift left one word to put odd word correct location */
2684      rtx rtx_tmp;
2685      rtx rtx_val = GEN_INT (4);
2686
2687      rtx_tmp = gen_reg_rtx (V4SFmode);
2688      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2689      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2690		 rtx_tmp, rtx_tmp, rtx_val));
2691    }
2692  DONE;
2693})
2694
2695;; Generate vsigned2
2696;; convert two double float vectors to a vector of single precision ints
2697(define_expand "vsigned2_v2df"
2698  [(match_operand:V4SI 0 "register_operand" "=wa")
2699   (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2700		 (match_operand:V2DF 2 "register_operand" "wa")]
2701  UNSPEC_VSX_VSIGNED2)]
2702  "TARGET_VSX"
2703{
2704  rtx rtx_src1, rtx_src2, rtx_dst;
2705  bool signed_convert=true;
2706
2707  rtx_dst = operands[0];
2708  rtx_src1 = operands[1];
2709  rtx_src2 = operands[2];
2710
2711  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2712  DONE;
2713})
2714
2715;; Generate vsignedo_v2df
2716;; signed double float to int convert odd word
2717(define_expand "vsignedo_v2df"
2718  [(set (match_operand:V4SI 0 "register_operand" "=wa")
2719	(match_operand:V2DF 1 "register_operand" "wa"))]
2720  "TARGET_VSX"
2721{
2722  if (VECTOR_ELT_ORDER_BIG)
2723    {
2724      rtx rtx_tmp;
2725      rtx rtx_val = GEN_INT (12);
2726      rtx_tmp = gen_reg_rtx (V4SImode);
2727
2728      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2729
2730      /* Big endian word numbering for words in operand is 0 1 2 3.
2731	 take (operand[1] operand[1]) and shift left one word
2732	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2733	 Words 1 and 3 are now are now where they need to be for result.  */
2734
2735      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2736		 rtx_tmp, rtx_val));
2737    }
2738  else
2739    /* Little endian word numbering for operand is 3 2 1 0.
2740       Result words 3 and 1 are where they need to be.  */
2741    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2742
2743  DONE;
2744}
2745  [(set_attr "type" "veccomplex")])
2746
2747;; Generate vsignede_v2df
2748;; signed double float to int even word
2749(define_expand "vsignede_v2df"
2750  [(set (match_operand:V4SI 0 "register_operand" "=v")
2751	(match_operand:V2DF 1 "register_operand" "v"))]
2752  "TARGET_VSX"
2753{
2754  if (VECTOR_ELT_ORDER_BIG)
2755    /* Big endian word numbering for words in operand is 0 1
2756       Result words 0 is where they need to be.  */
2757    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2758
2759  else
2760    {
2761      rtx rtx_tmp;
2762      rtx rtx_val = GEN_INT (12);
2763      rtx_tmp = gen_reg_rtx (V4SImode);
2764
2765      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2766
2767      /* Little endian word numbering for operand is 3 2 1 0.
2768	 take (operand[1] operand[1]) and shift left three words
2769	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2770	 Words 0 and 2 are now where they need to be for the result.  */
2771      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2772		 rtx_tmp, rtx_val));
2773    }
2774  DONE;
2775}
2776  [(set_attr "type" "veccomplex")])
2777
2778;; Generate unsigned2
2779;; convert two double float vectors to a vector of single precision
2780;; unsigned ints
2781(define_expand "vunsigned2_v2df"
2782[(match_operand:V4SI 0 "register_operand" "=v")
2783 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2784	       (match_operand:V2DF 2 "register_operand" "v")]
2785	      UNSPEC_VSX_VSIGNED2)]
2786 "TARGET_VSX"
2787{
2788  rtx rtx_src1, rtx_src2, rtx_dst;
2789  bool signed_convert=false;
2790
2791  rtx_dst = operands[0];
2792  rtx_src1 = operands[1];
2793  rtx_src2 = operands[2];
2794
2795  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2796  DONE;
2797})
2798
2799;; Generate vunsignedo_v2df
2800;; unsigned double float to int convert odd word
2801(define_expand "vunsignedo_v2df"
2802  [(set (match_operand:V4SI 0 "register_operand" "=v")
2803	(match_operand:V2DF 1 "register_operand" "v"))]
2804  "TARGET_VSX"
2805{
2806  if (VECTOR_ELT_ORDER_BIG)
2807    {
2808      rtx rtx_tmp;
2809      rtx rtx_val = GEN_INT (12);
2810      rtx_tmp = gen_reg_rtx (V4SImode);
2811
2812      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2813
2814      /* Big endian word numbering for words in operand is 0 1 2 3.
2815	 take (operand[1] operand[1]) and shift left one word
2816	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2817	 Words 1 and 3 are now are now where they need to be for result.  */
2818
2819      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2820		 rtx_tmp, rtx_val));
2821    }
2822  else
2823    /* Little endian word numbering for operand is 3 2 1 0.
2824       Result words 3 and 1 are where they need to be.  */
2825    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2826
2827  DONE;
2828}
2829  [(set_attr "type" "veccomplex")])
2830
2831;; Generate vunsignede_v2df
2832;; unsigned double float to int even word
2833(define_expand "vunsignede_v2df"
2834  [(set (match_operand:V4SI 0 "register_operand" "=v")
2835	(match_operand:V2DF 1 "register_operand" "v"))]
2836  "TARGET_VSX"
2837{
2838  if (VECTOR_ELT_ORDER_BIG)
2839    /* Big endian word numbering for words in operand is 0 1
2840       Result words 0 is where they need to be.  */
2841    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2842
2843  else
2844    {
2845      rtx rtx_tmp;
2846      rtx rtx_val = GEN_INT (12);
2847      rtx_tmp = gen_reg_rtx (V4SImode);
2848
2849      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2850
2851      /* Little endian word numbering for operand is 3 2 1 0.
2852	 take (operand[1] operand[1]) and shift left three words
2853	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2854	 Words 0 and 2 are now where they need to be for the result.  */
2855      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2856		 rtx_tmp, rtx_val));
2857    }
2858  DONE;
2859}
2860  [(set_attr "type" "veccomplex")])
2861
2862;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2863;; since the xvrdpiz instruction does not truncate the value if the floating
2864;; point value is < LONG_MIN or > LONG_MAX.
2865(define_insn "*vsx_float_fix_v2df2"
2866  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2867	(float:V2DF
2868	 (fix:V2DI
2869	  (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
2870  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
2871   && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2872   && !flag_trapping_math && TARGET_FRIZ"
2873  "xvrdpiz %x0,%x1"
2874  [(set_attr "type" "vecdouble")
2875   (set_attr "fp_type" "fp_addsub_d")])
2876
2877
2878;; Permute operations
2879
2880;; Build a V2DF/V2DI vector from two scalars
2881(define_insn "vsx_concat_<mode>"
2882  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2883	(vec_concat:VSX_D
2884	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2885	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2886  "VECTOR_MEM_VSX_P (<MODE>mode)"
2887{
2888  if (which_alternative == 0)
2889    return (BYTES_BIG_ENDIAN
2890	    ? "xxpermdi %x0,%x1,%x2,0"
2891	    : "xxpermdi %x0,%x2,%x1,0");
2892
2893  else if (which_alternative == 1)
2894    return (BYTES_BIG_ENDIAN
2895	    ? "mtvsrdd %x0,%1,%2"
2896	    : "mtvsrdd %x0,%2,%1");
2897
2898  else
2899    gcc_unreachable ();
2900}
2901  [(set_attr "type" "vecperm")])
2902
2903;; Combiner patterns to allow creating XXPERMDI's to access either double
2904;; word element in a vector register.
2905(define_insn "*vsx_concat_<mode>_1"
2906  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2907	(vec_concat:VSX_D
2908	 (vec_select:<VS_scalar>
2909	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911	 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2912  "VECTOR_MEM_VSX_P (<MODE>mode)"
2913{
2914  HOST_WIDE_INT dword = INTVAL (operands[2]);
2915  if (BYTES_BIG_ENDIAN)
2916    {
2917      operands[4] = GEN_INT (2*dword);
2918      return "xxpermdi %x0,%x1,%x3,%4";
2919    }
2920  else
2921    {
2922      operands[4] = GEN_INT (!dword);
2923      return "xxpermdi %x0,%x3,%x1,%4";
2924    }
2925}
2926  [(set_attr "type" "vecperm")])
2927
2928(define_insn "*vsx_concat_<mode>_2"
2929  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2930	(vec_concat:VSX_D
2931	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2932	 (vec_select:<VS_scalar>
2933	  (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2934	  (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2935  "VECTOR_MEM_VSX_P (<MODE>mode)"
2936{
2937  HOST_WIDE_INT dword = INTVAL (operands[3]);
2938  if (BYTES_BIG_ENDIAN)
2939    {
2940      operands[4] = GEN_INT (dword);
2941      return "xxpermdi %x0,%x1,%x2,%4";
2942    }
2943  else
2944    {
2945      operands[4] = GEN_INT (2 * !dword);
2946      return "xxpermdi %x0,%x2,%x1,%4";
2947    }
2948}
2949  [(set_attr "type" "vecperm")])
2950
2951(define_insn "*vsx_concat_<mode>_3"
2952  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2953	(vec_concat:VSX_D
2954	 (vec_select:<VS_scalar>
2955	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2956	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2957	 (vec_select:<VS_scalar>
2958	  (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2959	  (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2960  "VECTOR_MEM_VSX_P (<MODE>mode)"
2961{
2962  HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2963  HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2964  if (BYTES_BIG_ENDIAN)
2965    {
2966      operands[5] = GEN_INT ((2 * dword1) + dword2);
2967      return "xxpermdi %x0,%x1,%x3,%5";
2968    }
2969  else
2970    {
2971      operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2972      return "xxpermdi %x0,%x3,%x1,%5";
2973    }
2974}
2975  [(set_attr "type" "vecperm")])
2976
2977;; Special purpose concat using xxpermdi to glue two single precision values
2978;; together, relying on the fact that internally scalar floats are represented
2979;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2980(define_insn "vsx_concat_v2sf"
2981  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2982	(unspec:V2DF
2983	 [(match_operand:SF 1 "vsx_register_operand" "ww")
2984	  (match_operand:SF 2 "vsx_register_operand" "ww")]
2985	 UNSPEC_VSX_CONCAT))]
2986  "VECTOR_MEM_VSX_P (V2DFmode)"
2987{
2988  if (BYTES_BIG_ENDIAN)
2989    return "xxpermdi %x0,%x1,%x2,0";
2990  else
2991    return "xxpermdi %x0,%x2,%x1,0";
2992}
2993  [(set_attr "type" "vecperm")])
2994
2995;; V4SImode initialization splitter
2996(define_insn_and_split "vsx_init_v4si"
2997  [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2998	(unspec:V4SI
2999	 [(match_operand:SI 1 "reg_or_cint_operand" "rn")
3000	  (match_operand:SI 2 "reg_or_cint_operand" "rn")
3001	  (match_operand:SI 3 "reg_or_cint_operand" "rn")
3002	  (match_operand:SI 4 "reg_or_cint_operand" "rn")]
3003	 UNSPEC_VSX_VEC_INIT))
3004   (clobber (match_scratch:DI 5 "=&r"))
3005   (clobber (match_scratch:DI 6 "=&r"))]
3006   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3007   "#"
3008   "&& reload_completed"
3009   [(const_int 0)]
3010{
3011  rs6000_split_v4si_init (operands);
3012  DONE;
3013})
3014
3015;; xxpermdi for little endian loads and stores.  We need several of
3016;; these since the form of the PARALLEL differs by mode.
3017(define_insn "*vsx_xxpermdi2_le_<mode>"
3018  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3019        (vec_select:VSX_D
3020          (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3021          (parallel [(const_int 1) (const_int 0)])))]
3022  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3023  "xxpermdi %x0,%x1,%x1,2"
3024  [(set_attr "type" "vecperm")])
3025
3026(define_insn "*vsx_xxpermdi4_le_<mode>"
3027  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3028        (vec_select:VSX_W
3029          (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3030          (parallel [(const_int 2) (const_int 3)
3031                     (const_int 0) (const_int 1)])))]
3032  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3033  "xxpermdi %x0,%x1,%x1,2"
3034  [(set_attr "type" "vecperm")])
3035
3036(define_insn "*vsx_xxpermdi8_le_V8HI"
3037  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3038        (vec_select:V8HI
3039          (match_operand:V8HI 1 "vsx_register_operand" "wa")
3040          (parallel [(const_int 4) (const_int 5)
3041                     (const_int 6) (const_int 7)
3042                     (const_int 0) (const_int 1)
3043                     (const_int 2) (const_int 3)])))]
3044  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3045  "xxpermdi %x0,%x1,%x1,2"
3046  [(set_attr "type" "vecperm")])
3047
3048(define_insn "*vsx_xxpermdi16_le_V16QI"
3049  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3050        (vec_select:V16QI
3051          (match_operand:V16QI 1 "vsx_register_operand" "wa")
3052          (parallel [(const_int 8) (const_int 9)
3053                     (const_int 10) (const_int 11)
3054                     (const_int 12) (const_int 13)
3055                     (const_int 14) (const_int 15)
3056                     (const_int 0) (const_int 1)
3057                     (const_int 2) (const_int 3)
3058                     (const_int 4) (const_int 5)
3059                     (const_int 6) (const_int 7)])))]
3060  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3061  "xxpermdi %x0,%x1,%x1,2"
3062  [(set_attr "type" "vecperm")])
3063
3064;; lxvd2x for little endian loads.  We need several of
3065;; these since the form of the PARALLEL differs by mode.
3066(define_insn "*vsx_lxvd2x2_le_<mode>"
3067  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3068        (vec_select:VSX_D
3069          (match_operand:VSX_D 1 "memory_operand" "Z")
3070          (parallel [(const_int 1) (const_int 0)])))]
3071  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3072  "lxvd2x %x0,%y1"
3073  [(set_attr "type" "vecload")])
3074
3075(define_insn "*vsx_lxvd2x4_le_<mode>"
3076  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
3077        (vec_select:VSX_W
3078          (match_operand:VSX_W 1 "memory_operand" "Z")
3079          (parallel [(const_int 2) (const_int 3)
3080                     (const_int 0) (const_int 1)])))]
3081  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3082  "lxvd2x %x0,%y1"
3083  [(set_attr "type" "vecload")])
3084
3085(define_insn "*vsx_lxvd2x8_le_V8HI"
3086  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3087        (vec_select:V8HI
3088          (match_operand:V8HI 1 "memory_operand" "Z")
3089          (parallel [(const_int 4) (const_int 5)
3090                     (const_int 6) (const_int 7)
3091                     (const_int 0) (const_int 1)
3092                     (const_int 2) (const_int 3)])))]
3093  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3094  "lxvd2x %x0,%y1"
3095  [(set_attr "type" "vecload")])
3096
3097(define_insn "*vsx_lxvd2x16_le_V16QI"
3098  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3099        (vec_select:V16QI
3100          (match_operand:V16QI 1 "memory_operand" "Z")
3101          (parallel [(const_int 8) (const_int 9)
3102                     (const_int 10) (const_int 11)
3103                     (const_int 12) (const_int 13)
3104                     (const_int 14) (const_int 15)
3105                     (const_int 0) (const_int 1)
3106                     (const_int 2) (const_int 3)
3107                     (const_int 4) (const_int 5)
3108                     (const_int 6) (const_int 7)])))]
3109  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3110  "lxvd2x %x0,%y1"
3111  [(set_attr "type" "vecload")])
3112
3113;; stxvd2x for little endian stores.  We need several of
3114;; these since the form of the PARALLEL differs by mode.
3115(define_insn "*vsx_stxvd2x2_le_<mode>"
3116  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3117        (vec_select:VSX_D
3118          (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
3119          (parallel [(const_int 1) (const_int 0)])))]
3120  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3121  "stxvd2x %x1,%y0"
3122  [(set_attr "type" "vecstore")])
3123
3124(define_insn "*vsx_stxvd2x4_le_<mode>"
3125  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3126        (vec_select:VSX_W
3127          (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
3128          (parallel [(const_int 2) (const_int 3)
3129                     (const_int 0) (const_int 1)])))]
3130  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3131  "stxvd2x %x1,%y0"
3132  [(set_attr "type" "vecstore")])
3133
3134(define_insn "*vsx_stxvd2x8_le_V8HI"
3135  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3136        (vec_select:V8HI
3137          (match_operand:V8HI 1 "vsx_register_operand" "wa")
3138          (parallel [(const_int 4) (const_int 5)
3139                     (const_int 6) (const_int 7)
3140                     (const_int 0) (const_int 1)
3141                     (const_int 2) (const_int 3)])))]
3142  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3143  "stxvd2x %x1,%y0"
3144  [(set_attr "type" "vecstore")])
3145
3146(define_insn "*vsx_stxvd2x16_le_V16QI"
3147  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3148        (vec_select:V16QI
3149          (match_operand:V16QI 1 "vsx_register_operand" "wa")
3150          (parallel [(const_int 8) (const_int 9)
3151                     (const_int 10) (const_int 11)
3152                     (const_int 12) (const_int 13)
3153                     (const_int 14) (const_int 15)
3154                     (const_int 0) (const_int 1)
3155                     (const_int 2) (const_int 3)
3156                     (const_int 4) (const_int 5)
3157                     (const_int 6) (const_int 7)])))]
3158  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3159  "stxvd2x %x1,%y0"
3160  [(set_attr "type" "vecstore")])
3161
3162;; Convert a TImode value into V1TImode
3163(define_expand "vsx_set_v1ti"
3164  [(match_operand:V1TI 0 "nonimmediate_operand")
3165   (match_operand:V1TI 1 "nonimmediate_operand")
3166   (match_operand:TI 2 "input_operand")
3167   (match_operand:QI 3 "u5bit_cint_operand")]
3168  "VECTOR_MEM_VSX_P (V1TImode)"
3169{
3170  if (operands[3] != const0_rtx)
3171    gcc_unreachable ();
3172
3173  emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3174  DONE;
3175})
3176
3177;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3178(define_expand "vsx_set_<mode>"
3179  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3180   (use (match_operand:VSX_D 1 "vsx_register_operand"))
3181   (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3182   (use (match_operand:QI 3 "const_0_to_1_operand"))]
3183  "VECTOR_MEM_VSX_P (<MODE>mode)"
3184{
3185  rtx dest = operands[0];
3186  rtx vec_reg = operands[1];
3187  rtx value = operands[2];
3188  rtx ele = operands[3];
3189  rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3190
3191  if (ele == const0_rtx)
3192    {
3193      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3194      emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3195      DONE;
3196    }
3197  else if (ele == const1_rtx)
3198    {
3199      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3200      emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3201      DONE;
3202    }
3203  else
3204    gcc_unreachable ();
3205})
3206
3207;; Extract a DF/DI element from V2DF/V2DI
3208;; Optimize cases were we can do a simple or direct move.
3209;; Or see if we can avoid doing the move at all
3210
3211;; There are some unresolved problems with reload that show up if an Altivec
3212;; register was picked.  Limit the scalar value to FPRs for now.
3213
3214(define_insn "vsx_extract_<mode>"
3215  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
3216
3217	(vec_select:<VS_scalar>
3218	 (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
3219
3220	 (parallel
3221	  [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
3222  "VECTOR_MEM_VSX_P (<MODE>mode)"
3223{
3224  int element = INTVAL (operands[2]);
3225  int op0_regno = REGNO (operands[0]);
3226  int op1_regno = REGNO (operands[1]);
3227  int fldDM;
3228
3229  gcc_assert (IN_RANGE (element, 0, 1));
3230  gcc_assert (VSX_REGNO_P (op1_regno));
3231
3232  if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3233    {
3234      if (op0_regno == op1_regno)
3235	return ASM_COMMENT_START " vec_extract to same register";
3236
3237      else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3238	       && TARGET_POWERPC64)
3239	return "mfvsrd %0,%x1";
3240
3241      else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3242	return "fmr %0,%1";
3243
3244      else if (VSX_REGNO_P (op0_regno))
3245	return "xxlor %x0,%x1,%x1";
3246
3247      else
3248	gcc_unreachable ();
3249    }
3250
3251  else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3252	   && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3253    return "mfvsrld %0,%x1";
3254
3255  else if (VSX_REGNO_P (op0_regno))
3256    {
3257      fldDM = element << 1;
3258      if (!BYTES_BIG_ENDIAN)
3259	fldDM = 3 - fldDM;
3260      operands[3] = GEN_INT (fldDM);
3261      return "xxpermdi %x0,%x1,%x1,%3";
3262    }
3263
3264  else
3265    gcc_unreachable ();
3266}
3267  [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
3268
3269;; Optimize extracting a single scalar element from memory.
3270(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3271  [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3272	(vec_select:<VSX_D:VS_scalar>
3273	 (match_operand:VSX_D 1 "memory_operand" "m,m")
3274	 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3275   (clobber (match_scratch:P 3 "=&b,&b"))]
3276  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3277  "#"
3278  "&& reload_completed"
3279  [(set (match_dup 0) (match_dup 4))]
3280{
3281  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3282					   operands[3], <VSX_D:VS_scalar>mode);
3283}
3284  [(set_attr "type" "fpload,load")
3285   (set_attr "length" "8")])
3286
3287;; Optimize storing a single scalar element that is the right location to
3288;; memory
3289(define_insn "*vsx_extract_<mode>_store"
3290  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3291	(vec_select:<VS_scalar>
3292	 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
3293	 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3294  "VECTOR_MEM_VSX_P (<MODE>mode)"
3295  "@
3296   stfd%U0%X0 %1,%0
3297   stxsd%U0x %x1,%y0
3298   stxsd %1,%0"
3299  [(set_attr "type" "fpstore")
3300   (set_attr "length" "4")])
3301
3302;; Variable V2DI/V2DF extract shift
3303(define_insn "vsx_vslo_<mode>"
3304  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3305	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3306			     (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3307			    UNSPEC_VSX_VSLO))]
3308  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3309  "vslo %0,%1,%2"
3310  [(set_attr "type" "vecperm")])
3311
3312;; Variable V2DI/V2DF extract
3313(define_insn_and_split "vsx_extract_<mode>_var"
3314  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3315	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3316			     (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3317			    UNSPEC_VSX_EXTRACT))
3318   (clobber (match_scratch:DI 3 "=r,&b,&b"))
3319   (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3320  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3321  "#"
3322  "&& reload_completed"
3323  [(const_int 0)]
3324{
3325  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3326				operands[3], operands[4]);
3327  DONE;
3328})
3329
3330;; Extract a SF element from V4SF
3331(define_insn_and_split "vsx_extract_v4sf"
3332  [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
3333	(vec_select:SF
3334	 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3335	 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3336   (clobber (match_scratch:V4SF 3 "=0"))]
3337  "VECTOR_UNIT_VSX_P (V4SFmode)"
3338  "#"
3339  "&& 1"
3340  [(const_int 0)]
3341{
3342  rtx op0 = operands[0];
3343  rtx op1 = operands[1];
3344  rtx op2 = operands[2];
3345  rtx op3 = operands[3];
3346  rtx tmp;
3347  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3348
3349  if (ele == 0)
3350    tmp = op1;
3351  else
3352    {
3353      if (GET_CODE (op3) == SCRATCH)
3354	op3 = gen_reg_rtx (V4SFmode);
3355      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3356      tmp = op3;
3357    }
3358  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3359  DONE;
3360}
3361  [(set_attr "length" "8")
3362   (set_attr "type" "fp")])
3363
3364(define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3365  [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3366	(vec_select:SF
3367	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3368	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3369   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3370  "VECTOR_MEM_VSX_P (V4SFmode)"
3371  "#"
3372  "&& reload_completed"
3373  [(set (match_dup 0) (match_dup 4))]
3374{
3375  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3376					   operands[3], SFmode);
3377}
3378  [(set_attr "type" "fpload,fpload,fpload,load")
3379   (set_attr "length" "8")])
3380
3381;; Variable V4SF extract
3382(define_insn_and_split "vsx_extract_v4sf_var"
3383  [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3384	(unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3385		    (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3386		   UNSPEC_VSX_EXTRACT))
3387   (clobber (match_scratch:DI 3 "=r,&b,&b"))
3388   (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
3389  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3390  "#"
3391  "&& reload_completed"
3392  [(const_int 0)]
3393{
3394  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3395				operands[3], operands[4]);
3396  DONE;
3397})
3398
3399;; Expand the builtin form of xxpermdi to canonical rtl.
3400(define_expand "vsx_xxpermdi_<mode>"
3401  [(match_operand:VSX_L 0 "vsx_register_operand")
3402   (match_operand:VSX_L 1 "vsx_register_operand")
3403   (match_operand:VSX_L 2 "vsx_register_operand")
3404   (match_operand:QI 3 "u5bit_cint_operand")]
3405  "VECTOR_MEM_VSX_P (<MODE>mode)"
3406{
3407  rtx target = operands[0];
3408  rtx op0 = operands[1];
3409  rtx op1 = operands[2];
3410  int mask = INTVAL (operands[3]);
3411  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3412  rtx perm1 = GEN_INT ((mask & 1) + 2);
3413  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3414
3415  if (<MODE>mode == V2DFmode)
3416    gen = gen_vsx_xxpermdi2_v2df_1;
3417  else
3418    {
3419      gen = gen_vsx_xxpermdi2_v2di_1;
3420      if (<MODE>mode != V2DImode)
3421	{
3422	  target = gen_lowpart (V2DImode, target);
3423	  op0 = gen_lowpart (V2DImode, op0);
3424	  op1 = gen_lowpart (V2DImode, op1);
3425	}
3426    }
3427  emit_insn (gen (target, op0, op1, perm0, perm1));
3428  DONE;
3429})
3430
3431;; Special version of xxpermdi that retains big-endian semantics.
3432(define_expand "vsx_xxpermdi_<mode>_be"
3433  [(match_operand:VSX_L 0 "vsx_register_operand")
3434   (match_operand:VSX_L 1 "vsx_register_operand")
3435   (match_operand:VSX_L 2 "vsx_register_operand")
3436   (match_operand:QI 3 "u5bit_cint_operand")]
3437  "VECTOR_MEM_VSX_P (<MODE>mode)"
3438{
3439  rtx target = operands[0];
3440  rtx op0 = operands[1];
3441  rtx op1 = operands[2];
3442  int mask = INTVAL (operands[3]);
3443  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3444  rtx perm1 = GEN_INT ((mask & 1) + 2);
3445  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3446
3447  if (<MODE>mode == V2DFmode)
3448    gen = gen_vsx_xxpermdi2_v2df_1;
3449  else
3450    {
3451      gen = gen_vsx_xxpermdi2_v2di_1;
3452      if (<MODE>mode != V2DImode)
3453	{
3454	  target = gen_lowpart (V2DImode, target);
3455	  op0 = gen_lowpart (V2DImode, op0);
3456	  op1 = gen_lowpart (V2DImode, op1);
3457	}
3458    }
3459  /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3460     transformation we don't want; it is necessary for
3461     rs6000_expand_vec_perm_const_1 but not for this use.  So we
3462     prepare for that by reversing the transformation here.  */
3463  if (BYTES_BIG_ENDIAN)
3464    emit_insn (gen (target, op0, op1, perm0, perm1));
3465  else
3466    {
3467      rtx p0 = GEN_INT (3 - INTVAL (perm1));
3468      rtx p1 = GEN_INT (3 - INTVAL (perm0));
3469      emit_insn (gen (target, op1, op0, p0, p1));
3470    }
3471  DONE;
3472})
3473
3474(define_insn "vsx_xxpermdi2_<mode>_1"
3475  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
3476	(vec_select:VSX_D
3477	  (vec_concat:<VS_double>
3478	    (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3479	    (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3480	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
3481		     (match_operand 4 "const_2_to_3_operand" "")])))]
3482  "VECTOR_MEM_VSX_P (<MODE>mode)"
3483{
3484  int op3, op4, mask;
3485
3486  /* For little endian, swap operands and invert/swap selectors
3487     to get the correct xxpermdi.  The operand swap sets up the
3488     inputs as a little endian array.  The selectors are swapped
3489     because they are defined to use big endian ordering.  The
3490     selectors are inverted to get the correct doublewords for
3491     little endian ordering.  */
3492  if (BYTES_BIG_ENDIAN)
3493    {
3494      op3 = INTVAL (operands[3]);
3495      op4 = INTVAL (operands[4]);
3496    }
3497  else
3498    {
3499      op3 = 3 - INTVAL (operands[4]);
3500      op4 = 3 - INTVAL (operands[3]);
3501    }
3502
3503  mask = (op3 << 1) | (op4 - 2);
3504  operands[3] = GEN_INT (mask);
3505
3506  if (BYTES_BIG_ENDIAN)
3507    return "xxpermdi %x0,%x1,%x2,%3";
3508  else
3509    return "xxpermdi %x0,%x2,%x1,%3";
3510}
3511  [(set_attr "type" "vecperm")])
3512
3513;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3514;; none of the small types were allowed in a vector register, so we had to
3515;; extract to a DImode and either do a direct move or store.
3516(define_expand  "vsx_extract_<mode>"
3517  [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3518		   (vec_select:<VS_scalar>
3519		    (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3520		    (parallel [(match_operand:QI 2 "const_int_operand")])))
3521	      (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3522  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3523{
3524  /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3525  if (TARGET_P9_VECTOR)
3526    {
3527      emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3528					    operands[2]));
3529      DONE;
3530    }
3531})
3532
3533(define_insn "vsx_extract_<mode>_p9"
3534  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3535	(vec_select:<VS_scalar>
3536	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3537	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3538   (clobber (match_scratch:SI 3 "=r,X"))]
3539  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3540{
3541  if (which_alternative == 0)
3542    return "#";
3543
3544  else
3545    {
3546      HOST_WIDE_INT elt = INTVAL (operands[2]);
3547      HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3548			       ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3549			       : elt);
3550
3551      HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3552      HOST_WIDE_INT offset = unit_size * elt_adj;
3553
3554      operands[2] = GEN_INT (offset);
3555      if (unit_size == 4)
3556	return "xxextractuw %x0,%x1,%2";
3557      else
3558	return "vextractu<wd> %0,%1,%2";
3559    }
3560}
3561  [(set_attr "type" "vecsimple")])
3562
3563(define_split
3564  [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3565	(vec_select:<VS_scalar>
3566	 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3567	 (parallel [(match_operand:QI 2 "const_int_operand")])))
3568   (clobber (match_operand:SI 3 "int_reg_operand"))]
3569  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3570  [(const_int 0)]
3571{
3572  rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3573  rtx op1 = operands[1];
3574  rtx op2 = operands[2];
3575  rtx op3 = operands[3];
3576  HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3577
3578  emit_move_insn (op3, GEN_INT (offset));
3579  if (VECTOR_ELT_ORDER_BIG)
3580    emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3581  else
3582    emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3583  DONE;
3584})
3585
3586;; Optimize zero extracts to eliminate the AND after the extract.
3587(define_insn_and_split "*vsx_extract_<mode>_di_p9"
3588  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3589	(zero_extend:DI
3590	 (vec_select:<VS_scalar>
3591	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3592	  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3593   (clobber (match_scratch:SI 3 "=r,X"))]
3594  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3595  "#"
3596  "&& reload_completed"
3597  [(parallel [(set (match_dup 4)
3598		   (vec_select:<VS_scalar>
3599		    (match_dup 1)
3600		    (parallel [(match_dup 2)])))
3601	      (clobber (match_dup 3))])]
3602{
3603  operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3604})
3605
3606;; Optimize stores to use the ISA 3.0 scalar store instructions
3607(define_insn_and_split "*vsx_extract_<mode>_store_p9"
3608  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3609	(vec_select:<VS_scalar>
3610	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3611	 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3612   (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3613   (clobber (match_scratch:SI 4 "=X,&r"))]
3614  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3615  "#"
3616  "&& reload_completed"
3617  [(parallel [(set (match_dup 3)
3618		   (vec_select:<VS_scalar>
3619		    (match_dup 1)
3620		    (parallel [(match_dup 2)])))
3621	      (clobber (match_dup 4))])
3622   (set (match_dup 0)
3623	(match_dup 3))])
3624
3625(define_insn_and_split  "*vsx_extract_si"
3626  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
3627	(vec_select:SI
3628	 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3629	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3630   (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
3631  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3632  "#"
3633  "&& reload_completed"
3634  [(const_int 0)]
3635{
3636  rtx dest = operands[0];
3637  rtx src = operands[1];
3638  rtx element = operands[2];
3639  rtx vec_tmp = operands[3];
3640  int value;
3641
3642  if (!VECTOR_ELT_ORDER_BIG)
3643    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3644
3645  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3646     instruction.  */
3647  value = INTVAL (element);
3648  if (value != 1)
3649    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3650  else
3651    vec_tmp = src;
3652
3653  if (MEM_P (operands[0]))
3654    {
3655      if (can_create_pseudo_p ())
3656	dest = rs6000_address_for_fpconvert (dest);
3657
3658      if (TARGET_P8_VECTOR)
3659	emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3660      else
3661	emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3662    }
3663
3664  else if (TARGET_P8_VECTOR)
3665    emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3666  else
3667    emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3668		    gen_rtx_REG (DImode, REGNO (vec_tmp)));
3669
3670  DONE;
3671}
3672  [(set_attr "type" "mftgpr,vecperm,fpstore")
3673   (set_attr "length" "8")])
3674
3675(define_insn_and_split  "*vsx_extract_<mode>_p8"
3676  [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3677	(vec_select:<VS_scalar>
3678	 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3679	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3680   (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3681  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3682   && !TARGET_P9_VECTOR"
3683  "#"
3684  "&& reload_completed"
3685  [(const_int 0)]
3686{
3687  rtx dest = operands[0];
3688  rtx src = operands[1];
3689  rtx element = operands[2];
3690  rtx vec_tmp = operands[3];
3691  int value;
3692
3693  if (!VECTOR_ELT_ORDER_BIG)
3694    element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3695
3696  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3697     instruction.  */
3698  value = INTVAL (element);
3699  if (<MODE>mode == V16QImode)
3700    {
3701      if (value != 7)
3702	emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3703      else
3704	vec_tmp = src;
3705    }
3706  else if (<MODE>mode == V8HImode)
3707    {
3708      if (value != 3)
3709	emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3710      else
3711	vec_tmp = src;
3712    }
3713  else
3714    gcc_unreachable ();
3715
3716  emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3717		  gen_rtx_REG (DImode, REGNO (vec_tmp)));
3718  DONE;
3719}
3720  [(set_attr "type" "mftgpr")])
3721
3722;; Optimize extracting a single scalar element from memory.
3723(define_insn_and_split "*vsx_extract_<mode>_load"
3724  [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3725	(vec_select:<VS_scalar>
3726	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3727	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3728   (clobber (match_scratch:DI 3 "=&b"))]
3729  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3730  "#"
3731  "&& reload_completed"
3732  [(set (match_dup 0) (match_dup 4))]
3733{
3734  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3735					   operands[3], <VS_scalar>mode);
3736}
3737  [(set_attr "type" "load")
3738   (set_attr "length" "8")])
3739
3740;; Variable V16QI/V8HI/V4SI extract
3741(define_insn_and_split "vsx_extract_<mode>_var"
3742  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3743	(unspec:<VS_scalar>
3744	 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3745	  (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3746	 UNSPEC_VSX_EXTRACT))
3747   (clobber (match_scratch:DI 3 "=r,r,&b"))
3748   (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3749  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3750  "#"
3751  "&& reload_completed"
3752  [(const_int 0)]
3753{
3754  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3755				operands[3], operands[4]);
3756  DONE;
3757})
3758
3759(define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var"
3760  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
3761	(zero_extend:<VS_scalar>
3762	 (unspec:<VSX_EXTRACT_I:VS_scalar>
3763	  [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3764	   (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3765	  UNSPEC_VSX_EXTRACT)))
3766   (clobber (match_scratch:DI 3 "=r,r,&b"))
3767   (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3768  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3769  "#"
3770  "&& reload_completed"
3771  [(const_int 0)]
3772{
3773  machine_mode smode = <VS_scalar>mode;
3774  rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3775				operands[1], operands[2],
3776				operands[3], operands[4]);
3777  DONE;
3778})
3779
3780;; VSX_EXTRACT optimizations
3781;; Optimize double d = (double) vec_extract (vi, <n>)
3782;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3783(define_insn_and_split "*vsx_extract_si_<uns>float_df"
3784  [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3785	(any_float:DF
3786	 (vec_select:SI
3787	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3788	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3789   (clobber (match_scratch:V4SI 3 "=v"))]
3790  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3791  "#"
3792  "&& 1"
3793  [(const_int 0)]
3794{
3795  rtx dest = operands[0];
3796  rtx src = operands[1];
3797  rtx element = operands[2];
3798  rtx v4si_tmp = operands[3];
3799  int value;
3800
3801  if (!VECTOR_ELT_ORDER_BIG)
3802    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3803
3804  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3805     instruction.  */
3806  value = INTVAL (element);
3807  if (value != 0)
3808    {
3809      if (GET_CODE (v4si_tmp) == SCRATCH)
3810	v4si_tmp = gen_reg_rtx (V4SImode);
3811      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3812    }
3813  else
3814    v4si_tmp = src;
3815
3816  emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3817  DONE;
3818})
3819
3820;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3821;; where <type> is a floating point type that supported by the hardware that is
3822;; not double.  First convert the value to double, and then to the desired
3823;; type.
3824(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3825  [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3826	(any_float:VSX_EXTRACT_FL
3827	 (vec_select:SI
3828	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3829	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3830   (clobber (match_scratch:V4SI 3 "=v"))
3831   (clobber (match_scratch:DF 4 "=ws"))]
3832  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3833  "#"
3834  "&& 1"
3835  [(const_int 0)]
3836{
3837  rtx dest = operands[0];
3838  rtx src = operands[1];
3839  rtx element = operands[2];
3840  rtx v4si_tmp = operands[3];
3841  rtx df_tmp = operands[4];
3842  int value;
3843
3844  if (!VECTOR_ELT_ORDER_BIG)
3845    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3846
3847  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3848     instruction.  */
3849  value = INTVAL (element);
3850  if (value != 0)
3851    {
3852      if (GET_CODE (v4si_tmp) == SCRATCH)
3853	v4si_tmp = gen_reg_rtx (V4SImode);
3854      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3855    }
3856  else
3857    v4si_tmp = src;
3858
3859  if (GET_CODE (df_tmp) == SCRATCH)
3860    df_tmp = gen_reg_rtx (DFmode);
3861
3862  emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3863
3864  if (<MODE>mode == SFmode)
3865    emit_insn (gen_truncdfsf2 (dest, df_tmp));
3866  else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3867    emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3868  else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3869	   && TARGET_FLOAT128_HW)
3870    emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3871  else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3872    emit_insn (gen_extenddfif2 (dest, df_tmp));
3873  else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3874    emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3875  else
3876    gcc_unreachable ();
3877
3878  DONE;
3879})
3880
3881;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3882;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3883;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3884;; vector short or vector unsigned short.
3885(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3886  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3887	(float:FL_CONV
3888	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3889	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3890	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3891   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3892  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3893   && TARGET_P9_VECTOR"
3894  "#"
3895  "&& reload_completed"
3896  [(parallel [(set (match_dup 3)
3897		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3898		    (match_dup 1)
3899		    (parallel [(match_dup 2)])))
3900	      (clobber (scratch:SI))])
3901   (set (match_dup 4)
3902	(sign_extend:DI (match_dup 3)))
3903   (set (match_dup 0)
3904	(float:<FL_CONV:MODE> (match_dup 4)))]
3905{
3906  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3907})
3908
3909(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3910  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3911	(unsigned_float:FL_CONV
3912	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3913	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3914	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3915   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3916  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3917   && TARGET_P9_VECTOR"
3918  "#"
3919  "&& reload_completed"
3920  [(parallel [(set (match_dup 3)
3921		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3922		    (match_dup 1)
3923		    (parallel [(match_dup 2)])))
3924	      (clobber (scratch:SI))])
3925   (set (match_dup 0)
3926	(float:<FL_CONV:MODE> (match_dup 4)))]
3927{
3928  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3929})
3930
3931;; V4SI/V8HI/V16QI set operation on ISA 3.0
3932(define_insn "vsx_set_<mode>_p9"
3933  [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3934	(unspec:VSX_EXTRACT_I
3935	 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3936	  (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3937	  (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3938	 UNSPEC_VSX_SET))]
3939  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3940{
3941  int ele = INTVAL (operands[3]);
3942  int nunits = GET_MODE_NUNITS (<MODE>mode);
3943
3944  if (!VECTOR_ELT_ORDER_BIG)
3945    ele = nunits - 1 - ele;
3946
3947  operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3948  if (<MODE>mode == V4SImode)
3949    return "xxinsertw %x0,%x2,%3";
3950  else
3951    return "vinsert<wd> %0,%2,%3";
3952}
3953  [(set_attr "type" "vecperm")])
3954
3955(define_insn_and_split "vsx_set_v4sf_p9"
3956  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3957	(unspec:V4SF
3958	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3959	  (match_operand:SF 2 "gpc_reg_operand" "ww")
3960	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3961	 UNSPEC_VSX_SET))
3962   (clobber (match_scratch:SI 4 "=&wJwK"))]
3963  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3964  "#"
3965  "&& reload_completed"
3966  [(set (match_dup 5)
3967	(unspec:V4SF [(match_dup 2)]
3968		     UNSPEC_VSX_CVDPSPN))
3969   (parallel [(set (match_dup 4)
3970		   (vec_select:SI (match_dup 6)
3971				  (parallel [(match_dup 7)])))
3972	      (clobber (scratch:SI))])
3973   (set (match_dup 8)
3974	(unspec:V4SI [(match_dup 8)
3975		      (match_dup 4)
3976		      (match_dup 3)]
3977		     UNSPEC_VSX_SET))]
3978{
3979  unsigned int tmp_regno = reg_or_subregno (operands[4]);
3980
3981  operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3982  operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3983  operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3984  operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3985}
3986  [(set_attr "type" "vecperm")
3987   (set_attr "length" "12")])
3988
3989;; Special case setting 0.0f to a V4SF element
3990(define_insn_and_split "*vsx_set_v4sf_p9_zero"
3991  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3992	(unspec:V4SF
3993	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3994	  (match_operand:SF 2 "zero_fp_constant" "j")
3995	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3996	 UNSPEC_VSX_SET))
3997   (clobber (match_scratch:SI 4 "=&wJwK"))]
3998  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3999  "#"
4000  "&& reload_completed"
4001  [(set (match_dup 4)
4002	(const_int 0))
4003   (set (match_dup 5)
4004	(unspec:V4SI [(match_dup 5)
4005		      (match_dup 4)
4006		      (match_dup 3)]
4007		     UNSPEC_VSX_SET))]
4008{
4009  operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4010}
4011  [(set_attr "type" "vecperm")
4012   (set_attr "length" "8")])
4013
4014;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4015;; that is in the default scalar position (1 for big endian, 2 for little
4016;; endian).  We just need to do an xxinsertw since the element is in the
4017;; correct location.
4018
4019(define_insn "*vsx_insert_extract_v4sf_p9"
4020  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4021	(unspec:V4SF
4022	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4023	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4024			 (parallel
4025			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4026	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4027	 UNSPEC_VSX_SET))]
4028  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4029   && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4030{
4031  int ele = INTVAL (operands[4]);
4032
4033  if (!VECTOR_ELT_ORDER_BIG)
4034    ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4035
4036  operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4037  return "xxinsertw %x0,%x2,%4";
4038}
4039  [(set_attr "type" "vecperm")])
4040
4041;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4042;; that is in the default scalar position (1 for big endian, 2 for little
4043;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4044
4045(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4046  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4047	(unspec:V4SF
4048	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4050			 (parallel
4051			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4053	 UNSPEC_VSX_SET))
4054   (clobber (match_scratch:SI 5 "=&wJwK"))]
4055  "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4056   && TARGET_P9_VECTOR && TARGET_POWERPC64
4057   && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4058  "#"
4059  "&& 1"
4060  [(parallel [(set (match_dup 5)
4061		   (vec_select:SI (match_dup 6)
4062				  (parallel [(match_dup 3)])))
4063	      (clobber (scratch:SI))])
4064   (set (match_dup 7)
4065	(unspec:V4SI [(match_dup 8)
4066		      (match_dup 5)
4067		      (match_dup 4)]
4068		     UNSPEC_VSX_SET))]
4069{
4070  if (GET_CODE (operands[5]) == SCRATCH)
4071    operands[5] = gen_reg_rtx (SImode);
4072
4073  operands[6] = gen_lowpart (V4SImode, operands[2]);
4074  operands[7] = gen_lowpart (V4SImode, operands[0]);
4075  operands[8] = gen_lowpart (V4SImode, operands[1]);
4076}
4077  [(set_attr "type" "vecperm")])
4078
4079;; Expanders for builtins
4080(define_expand "vsx_mergel_<mode>"
4081  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4082   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4083   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4084  "VECTOR_MEM_VSX_P (<MODE>mode)"
4085{
4086  rtvec v;
4087  rtx x;
4088
4089  /* Special handling for LE with -maltivec=be.  */
4090  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4091    {
4092      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4093      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4094    }
4095  else
4096    {
4097      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4098      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4099    }
4100
4101  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4102  emit_insn (gen_rtx_SET (operands[0], x));
4103  DONE;
4104})
4105
4106(define_expand "vsx_mergeh_<mode>"
4107  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4108   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4109   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4110  "VECTOR_MEM_VSX_P (<MODE>mode)"
4111{
4112  rtvec v;
4113  rtx x;
4114
4115  /* Special handling for LE with -maltivec=be.  */
4116  if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4117    {
4118      v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4119      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4120    }
4121  else
4122    {
4123      v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4124      x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4125    }
4126
4127  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4128  emit_insn (gen_rtx_SET (operands[0], x));
4129  DONE;
4130})
4131
4132;; V2DF/V2DI splat
4133;; We separate the register splat insn from the memory splat insn to force the
4134;; register allocator to generate the indexed form of the SPLAT when it is
4135;; given an offsettable memory reference.  Otherwise, if the register and
4136;; memory insns were combined into a single insn, the register allocator will
4137;; load the value into a register, and then do a double word permute.
4138(define_expand "vsx_splat_<mode>"
4139  [(set (match_operand:VSX_D 0 "vsx_register_operand")
4140	(vec_duplicate:VSX_D
4141	 (match_operand:<VS_scalar> 1 "input_operand")))]
4142  "VECTOR_MEM_VSX_P (<MODE>mode)"
4143{
4144  rtx op1 = operands[1];
4145  if (MEM_P (op1))
4146    operands[1] = rs6000_address_for_fpconvert (op1);
4147  else if (!REG_P (op1))
4148    op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4149})
4150
4151(define_insn "vsx_splat_<mode>_reg"
4152  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4153	(vec_duplicate:VSX_D
4154	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
4155  "VECTOR_MEM_VSX_P (<MODE>mode)"
4156  "@
4157   xxpermdi %x0,%x1,%x1,0
4158   mtvsrdd %x0,%1,%1"
4159  [(set_attr "type" "vecperm")])
4160
4161(define_insn "vsx_splat_<VSX_D:mode>_mem"
4162  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4163	(vec_duplicate:VSX_D
4164	 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4165  "VECTOR_MEM_VSX_P (<MODE>mode)"
4166  "lxvdsx %x0,%y1"
4167  [(set_attr "type" "vecload")])
4168
4169;; V4SI splat support
4170(define_insn "vsx_splat_v4si"
4171  [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4172	(vec_duplicate:V4SI
4173	 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4174  "TARGET_P9_VECTOR"
4175  "@
4176   mtvsrws %x0,%1
4177   lxvwsx %x0,%y1"
4178  [(set_attr "type" "vecperm,vecload")])
4179
4180;; SImode is not currently allowed in vector registers.  This pattern
4181;; allows us to use direct move to get the value in a vector register
4182;; so that we can use XXSPLTW
4183(define_insn "vsx_splat_v4si_di"
4184  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4185	(vec_duplicate:V4SI
4186	 (truncate:SI
4187	  (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4188  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4189  "@
4190   xxspltw %x0,%x1,1
4191   mtvsrws %x0,%1"
4192  [(set_attr "type" "vecperm")])
4193
4194;; V4SF splat (ISA 3.0)
4195(define_insn_and_split "vsx_splat_v4sf"
4196  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4197	(vec_duplicate:V4SF
4198	 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
4199  "TARGET_P9_VECTOR"
4200  "@
4201   lxvwsx %x0,%y1
4202   #
4203   mtvsrws %x0,%1"
4204  "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4205  [(set (match_dup 0)
4206	(unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4207   (set (match_dup 0)
4208	(unspec:V4SF [(match_dup 0)
4209		      (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4210  ""
4211  [(set_attr "type" "vecload,vecperm,mftgpr")
4212   (set_attr "length" "4,8,4")])
4213
4214;; V4SF/V4SI splat from a vector element
4215(define_insn "vsx_xxspltw_<mode>"
4216  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4217	(vec_duplicate:VSX_W
4218	 (vec_select:<VS_scalar>
4219	  (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4220	  (parallel
4221	   [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4222  "VECTOR_MEM_VSX_P (<MODE>mode)"
4223{
4224  if (!BYTES_BIG_ENDIAN)
4225    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4226
4227  return "xxspltw %x0,%x1,%2";
4228}
4229  [(set_attr "type" "vecperm")])
4230
4231(define_insn "vsx_xxspltw_<mode>_direct"
4232  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4233        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4234                       (match_operand:QI 2 "u5bit_cint_operand" "i")]
4235                      UNSPEC_VSX_XXSPLTW))]
4236  "VECTOR_MEM_VSX_P (<MODE>mode)"
4237  "xxspltw %x0,%x1,%2"
4238  [(set_attr "type" "vecperm")])
4239
4240;; V16QI/V8HI splat support on ISA 2.07
4241(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4242  [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4243	(vec_duplicate:VSX_SPLAT_I
4244	 (truncate:<VS_scalar>
4245	  (match_operand:DI 1 "altivec_register_operand" "v"))))]
4246  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4247  "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4248  [(set_attr "type" "vecperm")])
4249
4250;; V2DF/V2DI splat for use by vec_splat builtin
4251(define_insn "vsx_xxspltd_<mode>"
4252  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4253        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4254	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
4255                      UNSPEC_VSX_XXSPLTD))]
4256  "VECTOR_MEM_VSX_P (<MODE>mode)"
4257{
4258  if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
4259      || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
4260    return "xxpermdi %x0,%x1,%x1,0";
4261  else
4262    return "xxpermdi %x0,%x1,%x1,3";
4263}
4264  [(set_attr "type" "vecperm")])
4265
4266;; V4SF/V4SI interleave
4267(define_insn "vsx_xxmrghw_<mode>"
4268  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4269        (vec_select:VSX_W
4270	  (vec_concat:<VS_double>
4271	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4272	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
4273	  (parallel [(const_int 0) (const_int 4)
4274		     (const_int 1) (const_int 5)])))]
4275  "VECTOR_MEM_VSX_P (<MODE>mode)"
4276{
4277  if (BYTES_BIG_ENDIAN)
4278    return "xxmrghw %x0,%x1,%x2";
4279  else
4280    return "xxmrglw %x0,%x2,%x1";
4281}
4282  [(set_attr "type" "vecperm")])
4283
4284(define_insn "vsx_xxmrglw_<mode>"
4285  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
4286	(vec_select:VSX_W
4287	  (vec_concat:<VS_double>
4288	    (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4289	    (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
4290	  (parallel [(const_int 2) (const_int 6)
4291		     (const_int 3) (const_int 7)])))]
4292  "VECTOR_MEM_VSX_P (<MODE>mode)"
4293{
4294  if (BYTES_BIG_ENDIAN)
4295    return "xxmrglw %x0,%x1,%x2";
4296  else
4297    return "xxmrghw %x0,%x2,%x1";
4298}
4299  [(set_attr "type" "vecperm")])
4300
4301;; Shift left double by word immediate
4302(define_insn "vsx_xxsldwi_<mode>"
4303  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4304	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4305		       (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
4306		       (match_operand:QI 3 "u5bit_cint_operand" "i")]
4307		      UNSPEC_VSX_SLDWI))]
4308  "VECTOR_MEM_VSX_P (<MODE>mode)"
4309  "xxsldwi %x0,%x1,%x2,%3"
4310  [(set_attr "type" "vecperm")])
4311
4312
4313;; Vector reduction insns and splitters
4314
4315(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4316  [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4317	(VEC_reduc:V2DF
4318	 (vec_concat:V2DF
4319	  (vec_select:DF
4320	   (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4321	   (parallel [(const_int 1)]))
4322	  (vec_select:DF
4323	   (match_dup 1)
4324	   (parallel [(const_int 0)])))
4325	 (match_dup 1)))
4326   (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4327  "VECTOR_UNIT_VSX_P (V2DFmode)"
4328  "#"
4329  ""
4330  [(const_int 0)]
4331{
4332  rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4333	     ? gen_reg_rtx (V2DFmode)
4334	     : operands[2];
4335  emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4336  emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4337  DONE;
4338}
4339  [(set_attr "length" "8")
4340   (set_attr "type" "veccomplex")])
4341
4342(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4343  [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4344	(VEC_reduc:V4SF
4345	 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4346	 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4347   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4348   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4349  "VECTOR_UNIT_VSX_P (V4SFmode)"
4350  "#"
4351  ""
4352  [(const_int 0)]
4353{
4354  rtx op0 = operands[0];
4355  rtx op1 = operands[1];
4356  rtx tmp2, tmp3, tmp4;
4357
4358  if (can_create_pseudo_p ())
4359    {
4360      tmp2 = gen_reg_rtx (V4SFmode);
4361      tmp3 = gen_reg_rtx (V4SFmode);
4362      tmp4 = gen_reg_rtx (V4SFmode);
4363    }
4364  else
4365    {
4366      tmp2 = operands[2];
4367      tmp3 = operands[3];
4368      tmp4 = tmp2;
4369    }
4370
4371  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4372  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4373  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4374  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4375  DONE;
4376}
4377  [(set_attr "length" "16")
4378   (set_attr "type" "veccomplex")])
4379
4380;; Combiner patterns with the vector reduction patterns that knows we can get
4381;; to the top element of the V2DF array without doing an extract.
4382
4383(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4384  [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
4385	(vec_select:DF
4386	 (VEC_reduc:V2DF
4387	  (vec_concat:V2DF
4388	   (vec_select:DF
4389	    (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4390	    (parallel [(const_int 1)]))
4391	   (vec_select:DF
4392	    (match_dup 1)
4393	    (parallel [(const_int 0)])))
4394	  (match_dup 1))
4395	 (parallel [(const_int 1)])))
4396   (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4397  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4398  "#"
4399  ""
4400  [(const_int 0)]
4401{
4402  rtx hi = gen_highpart (DFmode, operands[1]);
4403  rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4404	    ? gen_reg_rtx (DFmode)
4405	    : operands[2];
4406
4407  emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4408  emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4409  DONE;
4410}
4411  [(set_attr "length" "8")
4412   (set_attr "type" "veccomplex")])
4413
4414(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4415  [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4416	(vec_select:SF
4417	 (VEC_reduc:V4SF
4418	  (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4419	  (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4420	 (parallel [(const_int 3)])))
4421   (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4422   (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4423   (clobber (match_scratch:V4SF 4 "=0,0"))]
4424  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4425  "#"
4426  ""
4427  [(const_int 0)]
4428{
4429  rtx op0 = operands[0];
4430  rtx op1 = operands[1];
4431  rtx tmp2, tmp3, tmp4, tmp5;
4432
4433  if (can_create_pseudo_p ())
4434    {
4435      tmp2 = gen_reg_rtx (V4SFmode);
4436      tmp3 = gen_reg_rtx (V4SFmode);
4437      tmp4 = gen_reg_rtx (V4SFmode);
4438      tmp5 = gen_reg_rtx (V4SFmode);
4439    }
4440  else
4441    {
4442      tmp2 = operands[2];
4443      tmp3 = operands[3];
4444      tmp4 = tmp2;
4445      tmp5 = operands[4];
4446    }
4447
4448  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4449  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4450  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4451  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4452  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4453  DONE;
4454}
4455  [(set_attr "length" "20")
4456   (set_attr "type" "veccomplex")])
4457
4458
4459;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4460(define_peephole
4461  [(set (match_operand:P 0 "base_reg_operand")
4462	(match_operand:P 1 "short_cint_operand"))
4463   (set (match_operand:VSX_M 2 "vsx_register_operand")
4464	(mem:VSX_M (plus:P (match_dup 0)
4465			   (match_operand:P 3 "int_reg_operand"))))]
4466  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4467  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4468  [(set_attr "length" "8")
4469   (set_attr "type" "vecload")])
4470
4471(define_peephole
4472  [(set (match_operand:P 0 "base_reg_operand")
4473	(match_operand:P 1 "short_cint_operand"))
4474   (set (match_operand:VSX_M 2 "vsx_register_operand")
4475	(mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4476			   (match_dup 0))))]
4477  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4478  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4479  [(set_attr "length" "8")
4480   (set_attr "type" "vecload")])
4481
4482
4483;; ISA 3.0 vector extend sign support
4484
4485(define_insn "vsx_sign_extend_qi_<mode>"
4486  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4487	(unspec:VSINT_84
4488	 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4489	 UNSPEC_VSX_SIGN_EXTEND))]
4490  "TARGET_P9_VECTOR"
4491  "vextsb2<wd> %0,%1"
4492  [(set_attr "type" "vecexts")])
4493
4494(define_insn "vsx_sign_extend_hi_<mode>"
4495  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4496	(unspec:VSINT_84
4497	 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4498	 UNSPEC_VSX_SIGN_EXTEND))]
4499  "TARGET_P9_VECTOR"
4500  "vextsh2<wd> %0,%1"
4501  [(set_attr "type" "vecexts")])
4502
4503(define_insn "*vsx_sign_extend_si_v2di"
4504  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4505	(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4506		     UNSPEC_VSX_SIGN_EXTEND))]
4507  "TARGET_P9_VECTOR"
4508  "vextsw2d %0,%1"
4509  [(set_attr "type" "vecexts")])
4510
4511
4512;; ISA 3.0 Binary Floating-Point Support
4513
4514;; VSX Scalar Extract Exponent Quad-Precision
4515(define_insn "xsxexpqp_<mode>"
4516  [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4517	(unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4518	 UNSPEC_VSX_SXEXPDP))]
4519  "TARGET_P9_VECTOR"
4520  "xsxexpqp %0,%1"
4521  [(set_attr "type" "vecmove")])
4522
4523;; VSX Scalar Extract Exponent Double-Precision
4524(define_insn "xsxexpdp"
4525  [(set (match_operand:DI 0 "register_operand" "=r")
4526	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4527	 UNSPEC_VSX_SXEXPDP))]
4528  "TARGET_P9_VECTOR && TARGET_64BIT"
4529  "xsxexpdp %0,%x1"
4530  [(set_attr "type" "integer")])
4531
4532;; VSX Scalar Extract Significand Quad-Precision
4533(define_insn "xsxsigqp_<mode>"
4534  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4535	(unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4536	 UNSPEC_VSX_SXSIG))]
4537  "TARGET_P9_VECTOR"
4538  "xsxsigqp %0,%1"
4539  [(set_attr "type" "vecmove")])
4540
4541;; VSX Scalar Extract Significand Double-Precision
4542(define_insn "xsxsigdp"
4543  [(set (match_operand:DI 0 "register_operand" "=r")
4544	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4545	 UNSPEC_VSX_SXSIG))]
4546  "TARGET_P9_VECTOR && TARGET_64BIT"
4547  "xsxsigdp %0,%x1"
4548  [(set_attr "type" "integer")])
4549
4550;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4551(define_insn "xsiexpqpf_<mode>"
4552  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4553	(unspec:IEEE128
4554	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4555	  (match_operand:DI 2 "altivec_register_operand" "v")]
4556	 UNSPEC_VSX_SIEXPQP))]
4557  "TARGET_P9_VECTOR"
4558  "xsiexpqp %0,%1,%2"
4559  [(set_attr "type" "vecmove")])
4560
4561;; VSX Scalar Insert Exponent Quad-Precision
4562(define_insn "xsiexpqp_<mode>"
4563  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4564	(unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4565			 (match_operand:DI 2 "altivec_register_operand" "v")]
4566	 UNSPEC_VSX_SIEXPQP))]
4567  "TARGET_P9_VECTOR"
4568  "xsiexpqp %0,%1,%2"
4569  [(set_attr "type" "vecmove")])
4570
4571;; VSX Scalar Insert Exponent Double-Precision
4572(define_insn "xsiexpdp"
4573  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4574	(unspec:DF [(match_operand:DI 1 "register_operand" "r")
4575		    (match_operand:DI 2 "register_operand" "r")]
4576	 UNSPEC_VSX_SIEXPDP))]
4577  "TARGET_P9_VECTOR && TARGET_64BIT"
4578  "xsiexpdp %x0,%1,%2"
4579  [(set_attr "type" "fpsimple")])
4580
4581;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4582(define_insn "xsiexpdpf"
4583  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4584	(unspec:DF [(match_operand:DF 1 "register_operand" "r")
4585		    (match_operand:DI 2 "register_operand" "r")]
4586	 UNSPEC_VSX_SIEXPDP))]
4587  "TARGET_P9_VECTOR && TARGET_64BIT"
4588  "xsiexpdp %x0,%1,%2"
4589  [(set_attr "type" "fpsimple")])
4590
4591;; VSX Scalar Compare Exponents Double-Precision
4592(define_expand "xscmpexpdp_<code>"
4593  [(set (match_dup 3)
4594	(compare:CCFP
4595	 (unspec:DF
4596	  [(match_operand:DF 1 "vsx_register_operand" "wa")
4597	   (match_operand:DF 2 "vsx_register_operand" "wa")]
4598	  UNSPEC_VSX_SCMPEXPDP)
4599	 (const_int 0)))
4600   (set (match_operand:SI 0 "register_operand" "=r")
4601	(CMP_TEST:SI (match_dup 3)
4602		     (const_int 0)))]
4603  "TARGET_P9_VECTOR"
4604{
4605  operands[3] = gen_reg_rtx (CCFPmode);
4606})
4607
4608(define_insn "*xscmpexpdp"
4609  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4610	(compare:CCFP
4611	 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4612		     (match_operand:DF 2 "vsx_register_operand" "wa")]
4613	  UNSPEC_VSX_SCMPEXPDP)
4614	 (match_operand:SI 3 "zero_constant" "j")))]
4615  "TARGET_P9_VECTOR"
4616  "xscmpexpdp %0,%x1,%x2"
4617  [(set_attr "type" "fpcompare")])
4618
4619;; VSX Scalar Test Data Class Quad-Precision
4620;;  (Expansion for scalar_test_data_class (__ieee128, int))
4621;;   (Has side effect of setting the lt bit if operand 1 is negative,
4622;;    setting the eq bit if any of the conditions tested by operand 2
4623;;    are satisfied, and clearing the gt and undordered bits to zero.)
4624(define_expand "xststdcqp_<mode>"
4625  [(set (match_dup 3)
4626	(compare:CCFP
4627	 (unspec:IEEE128
4628	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4629	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4630	  UNSPEC_VSX_STSTDC)
4631	 (const_int 0)))
4632   (set (match_operand:SI 0 "register_operand" "=r")
4633	(eq:SI (match_dup 3)
4634	       (const_int 0)))]
4635  "TARGET_P9_VECTOR"
4636{
4637  operands[3] = gen_reg_rtx (CCFPmode);
4638})
4639
4640;; VSX Scalar Test Data Class Double- and Single-Precision
4641;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4642;;   if any of the conditions tested by operand 2 are satisfied.
4643;;   The gt and unordered bits are cleared to zero.)
4644(define_expand "xststdc<Fvsx>"
4645  [(set (match_dup 3)
4646	(compare:CCFP
4647	 (unspec:SFDF
4648	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4649	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4650	  UNSPEC_VSX_STSTDC)
4651	 (match_dup 4)))
4652   (set (match_operand:SI 0 "register_operand" "=r")
4653	(eq:SI (match_dup 3)
4654	       (const_int 0)))]
4655  "TARGET_P9_VECTOR"
4656{
4657  operands[3] = gen_reg_rtx (CCFPmode);
4658  operands[4] = CONST0_RTX (SImode);
4659})
4660
4661;; The VSX Scalar Test Negative Quad-Precision
4662(define_expand "xststdcnegqp_<mode>"
4663  [(set (match_dup 2)
4664	(compare:CCFP
4665	 (unspec:IEEE128
4666	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4667	   (const_int 0)]
4668	  UNSPEC_VSX_STSTDC)
4669	 (const_int 0)))
4670   (set (match_operand:SI 0 "register_operand" "=r")
4671	(lt:SI (match_dup 2)
4672	       (const_int 0)))]
4673  "TARGET_P9_VECTOR"
4674{
4675  operands[2] = gen_reg_rtx (CCFPmode);
4676})
4677
4678;; The VSX Scalar Test Negative Double- and Single-Precision
4679(define_expand "xststdcneg<Fvsx>"
4680  [(set (match_dup 2)
4681	(compare:CCFP
4682	 (unspec:SFDF
4683	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4684	   (const_int 0)]
4685	  UNSPEC_VSX_STSTDC)
4686	 (match_dup 3)))
4687   (set (match_operand:SI 0 "register_operand" "=r")
4688	(lt:SI (match_dup 2)
4689	       (const_int 0)))]
4690  "TARGET_P9_VECTOR"
4691{
4692  operands[2] = gen_reg_rtx (CCFPmode);
4693  operands[3] = CONST0_RTX (SImode);
4694})
4695
4696(define_insn "*xststdcqp_<mode>"
4697  [(set (match_operand:CCFP 0 "" "=y")
4698	(compare:CCFP
4699	 (unspec:IEEE128
4700	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4701	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4702	  UNSPEC_VSX_STSTDC)
4703	 (const_int 0)))]
4704  "TARGET_P9_VECTOR"
4705  "xststdcqp %0,%1,%2"
4706  [(set_attr "type" "fpcompare")])
4707
4708(define_insn "*xststdc<Fvsx>"
4709  [(set (match_operand:CCFP 0 "" "=y")
4710	(compare:CCFP
4711	 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4712		       (match_operand:SI 2 "u7bit_cint_operand" "n")]
4713	  UNSPEC_VSX_STSTDC)
4714	 (match_operand:SI 3 "zero_constant" "j")))]
4715  "TARGET_P9_VECTOR"
4716  "xststdc<Fvsx> %0,%x1,%2"
4717  [(set_attr "type" "fpcompare")])
4718
4719;; VSX Vector Extract Exponent Double and Single Precision
4720(define_insn "xvxexp<VSs>"
4721  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4722	(unspec:VSX_F
4723	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4724	 UNSPEC_VSX_VXEXP))]
4725  "TARGET_P9_VECTOR"
4726  "xvxexp<VSs> %x0,%x1"
4727  [(set_attr "type" "vecsimple")])
4728
4729;; VSX Vector Extract Significand Double and Single Precision
4730(define_insn "xvxsig<VSs>"
4731  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4732	(unspec:VSX_F
4733	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4734	 UNSPEC_VSX_VXSIG))]
4735  "TARGET_P9_VECTOR"
4736  "xvxsig<VSs> %x0,%x1"
4737  [(set_attr "type" "vecsimple")])
4738
4739;; VSX Vector Insert Exponent Double and Single Precision
4740(define_insn "xviexp<VSs>"
4741  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4742	(unspec:VSX_F
4743	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4744	  (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4745	 UNSPEC_VSX_VIEXP))]
4746  "TARGET_P9_VECTOR"
4747  "xviexp<VSs> %x0,%x1,%x2"
4748  [(set_attr "type" "vecsimple")])
4749
4750;; VSX Vector Test Data Class Double and Single Precision
4751;; The corresponding elements of the result vector are all ones
4752;; if any of the conditions tested by operand 3 are satisfied.
4753(define_insn "xvtstdc<VSs>"
4754  [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4755	(unspec:<VSI>
4756	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4757	  (match_operand:SI 2 "u7bit_cint_operand" "n")]
4758	 UNSPEC_VSX_VTSTDC))]
4759  "TARGET_P9_VECTOR"
4760  "xvtstdc<VSs> %x0,%x1,%2"
4761  [(set_attr "type" "vecsimple")])
4762
4763;; ISA 3.0 String Operations Support
4764
4765;; Compare vectors producing a vector result and a predicate, setting CR6
4766;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4767;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4768;; need to match v4sf, v2df, or v2di modes because those are expanded
4769;; to use Power8 instructions.
4770(define_insn "*vsx_ne_<mode>_p"
4771  [(set (reg:CC CR6_REGNO)
4772	(unspec:CC
4773	 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4774		 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4775	 UNSPEC_PREDICATE))
4776   (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4777	(ne:VSX_EXTRACT_I (match_dup 1)
4778			  (match_dup 2)))]
4779  "TARGET_P9_VECTOR"
4780  "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4781  [(set_attr "type" "vecsimple")])
4782
4783(define_insn "*vector_nez_<mode>_p"
4784  [(set (reg:CC CR6_REGNO)
4785	(unspec:CC [(unspec:VI
4786		     [(match_operand:VI 1 "gpc_reg_operand" "v")
4787		      (match_operand:VI 2 "gpc_reg_operand" "v")]
4788		     UNSPEC_NEZ_P)]
4789	 UNSPEC_PREDICATE))
4790   (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4791	(unspec:VI [(match_dup 1)
4792		    (match_dup 2)]
4793	 UNSPEC_NEZ_P))]
4794  "TARGET_P9_VECTOR"
4795  "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4796  [(set_attr "type" "vecsimple")])
4797
4798;; Return first position of match between vectors using natural order
4799;; for both LE and BE execution modes.
4800(define_expand "first_match_index_<mode>"
4801  [(match_operand:SI 0 "register_operand")
4802   (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4803	       (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4804  UNSPEC_VSX_FIRST_MATCH_INDEX)]
4805  "TARGET_P9_VECTOR"
4806{
4807  int sh;
4808
4809  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4810  rtx not_result = gen_reg_rtx (<MODE>mode);
4811
4812  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4813					     operands[2]));
4814  emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4815
4816  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4817
4818  if (<MODE>mode == V16QImode)
4819    {
4820      if (!BYTES_BIG_ENDIAN)
4821        emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4822      else
4823        emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4824    }
4825  else
4826    {
4827      rtx tmp = gen_reg_rtx (SImode);
4828      if (!BYTES_BIG_ENDIAN)
4829        emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4830      else
4831        emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4832      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4833    }
4834  DONE;
4835})
4836
4837;; Return first position of match between vectors or end of string (EOS) using
4838;; natural element order for both LE and BE execution modes.
4839(define_expand "first_match_or_eos_index_<mode>"
4840  [(match_operand:SI 0 "register_operand")
4841   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4842   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4843  UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4844  "TARGET_P9_VECTOR"
4845{
4846  int sh;
4847  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4848  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4849  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4850  rtx and_result = gen_reg_rtx (<MODE>mode);
4851  rtx result = gen_reg_rtx (<MODE>mode);
4852  rtx vzero = gen_reg_rtx (<MODE>mode);
4853
4854  /* Vector with zeros in elements that correspond to zeros in operands.  */
4855  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4856  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4857  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4858  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4859
4860  /* Vector with ones in elments that do not match.  */
4861  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4862                                             operands[2]));
4863
4864  /* Create vector with ones in elements where there was a zero in one of
4865     the source elements or the elements that match.  */
4866  emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4867  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4868
4869  if (<MODE>mode == V16QImode)
4870    {
4871      if (!BYTES_BIG_ENDIAN)
4872        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4873      else
4874        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4875    }
4876  else
4877    {
4878      rtx tmp = gen_reg_rtx (SImode);
4879      if (!BYTES_BIG_ENDIAN)
4880        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4881      else
4882        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4883      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4884    }
4885  DONE;
4886})
4887
4888;; Return first position of mismatch between vectors using natural
4889;; element order for both LE and BE execution modes.
4890(define_expand "first_mismatch_index_<mode>"
4891  [(match_operand:SI 0 "register_operand")
4892   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4893   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4894  UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4895  "TARGET_P9_VECTOR"
4896{
4897  int sh;
4898  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4899
4900  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4901					    operands[2]));
4902  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4903
4904  if (<MODE>mode == V16QImode)
4905    {
4906      if (!BYTES_BIG_ENDIAN)
4907        emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4908      else
4909        emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4910    }
4911  else
4912    {
4913      rtx tmp = gen_reg_rtx (SImode);
4914      if (!BYTES_BIG_ENDIAN)
4915        emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4916      else
4917        emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4918      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4919    }
4920  DONE;
4921})
4922
4923;; Return first position of mismatch between vectors or end of string (EOS)
4924;; using natural element order for both LE and BE execution modes.
4925(define_expand "first_mismatch_or_eos_index_<mode>"
4926  [(match_operand:SI 0 "register_operand")
4927   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4928   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4929  UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4930  "TARGET_P9_VECTOR"
4931{
4932  int sh;
4933  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4934  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4935  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4936  rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4937  rtx and_result = gen_reg_rtx (<MODE>mode);
4938  rtx result = gen_reg_rtx (<MODE>mode);
4939  rtx vzero = gen_reg_rtx (<MODE>mode);
4940
4941  /* Vector with zeros in elements that correspond to zeros in operands.  */
4942  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4943
4944  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4945  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4946  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4947
4948  /* Vector with ones in elments that match.  */
4949  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4950                                             operands[2]));
4951  emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4952
4953  /* Create vector with ones in elements where there was a zero in one of
4954     the source elements or the elements did not match.  */
4955  emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4956  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4957
4958  if (<MODE>mode == V16QImode)
4959    {
4960      if (!BYTES_BIG_ENDIAN)
4961        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4962      else
4963        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4964    }
4965  else
4966    {
4967      rtx tmp = gen_reg_rtx (SImode);
4968      if (!BYTES_BIG_ENDIAN)
4969        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4970      else
4971        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4972      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4973    }
4974  DONE;
4975})
4976
4977;; Load VSX Vector with Length
4978(define_expand "lxvl"
4979  [(set (match_dup 3)
4980        (ashift:DI (match_operand:DI 2 "register_operand")
4981                   (const_int 56)))
4982   (set (match_operand:V16QI 0 "vsx_register_operand")
4983	(unspec:V16QI
4984	 [(match_operand:DI 1 "gpc_reg_operand")
4985          (mem:V16QI (match_dup 1))
4986	  (match_dup 3)]
4987	 UNSPEC_LXVL))]
4988  "TARGET_P9_VECTOR && TARGET_64BIT"
4989{
4990  operands[3] = gen_reg_rtx (DImode);
4991})
4992
4993(define_insn "*lxvl"
4994  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4995	(unspec:V16QI
4996	 [(match_operand:DI 1 "gpc_reg_operand" "b")
4997	  (mem:V16QI (match_dup 1))
4998	  (match_operand:DI 2 "register_operand" "r")]
4999	 UNSPEC_LXVL))]
5000  "TARGET_P9_VECTOR && TARGET_64BIT"
5001  "lxvl %x0,%1,%2"
5002  [(set_attr "type" "vecload")])
5003
5004(define_insn "lxvll"
5005  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5006	(unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5007                       (mem:V16QI (match_dup 1))
5008		       (match_operand:DI 2 "register_operand" "r")]
5009		      UNSPEC_LXVLL))]
5010  "TARGET_P9_VECTOR"
5011  "lxvll %x0,%1,%2"
5012  [(set_attr "type" "vecload")])
5013
5014;; Expand for builtin xl_len_r
5015(define_expand "xl_len_r"
5016  [(match_operand:V16QI 0 "vsx_register_operand")
5017   (match_operand:DI 1 "register_operand")
5018   (match_operand:DI 2 "register_operand")]
5019  ""
5020{
5021  rtx shift_mask = gen_reg_rtx (V16QImode);
5022  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5023  rtx tmp = gen_reg_rtx (DImode);
5024
5025  emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5026  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5027  emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5028  emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5029	     shift_mask));
5030  DONE;
5031})
5032
5033(define_insn "stxvll"
5034  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5035	(unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5036		       (mem:V16QI (match_dup 1))
5037		       (match_operand:DI 2 "register_operand" "r")]
5038	              UNSPEC_STXVLL))]
5039  "TARGET_P9_VECTOR"
5040  "stxvll %x0,%1,%2"
5041  [(set_attr "type" "vecstore")])
5042
5043;; Store VSX Vector with Length
5044(define_expand "stxvl"
5045  [(set (match_dup 3)
5046	(ashift:DI (match_operand:DI 2 "register_operand")
5047		   (const_int 56)))
5048   (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5049	(unspec:V16QI
5050	 [(match_operand:V16QI 0 "vsx_register_operand")
5051	  (mem:V16QI (match_dup 1))
5052	  (match_dup 3)]
5053	 UNSPEC_STXVL))]
5054  "TARGET_P9_VECTOR && TARGET_64BIT"
5055{
5056  operands[3] = gen_reg_rtx (DImode);
5057})
5058
5059(define_insn "*stxvl"
5060  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5061	(unspec:V16QI
5062	 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5063	  (mem:V16QI (match_dup 1))
5064	  (match_operand:DI 2 "register_operand" "r")]
5065	 UNSPEC_STXVL))]
5066  "TARGET_P9_VECTOR && TARGET_64BIT"
5067  "stxvl %x0,%1,%2"
5068  [(set_attr "type" "vecstore")])
5069
5070;; Expand for builtin xst_len_r
5071(define_expand "xst_len_r"
5072  [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5073   (match_operand:DI 1 "register_operand" "b")
5074   (match_operand:DI 2 "register_operand" "r")]
5075  "UNSPEC_XST_LEN_R"
5076{
5077  rtx shift_mask = gen_reg_rtx (V16QImode);
5078  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5079  rtx tmp = gen_reg_rtx (DImode);
5080
5081  emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5082  emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5083	     shift_mask));
5084  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5085  emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5086  DONE;
5087})
5088
5089;; Vector Compare Not Equal Byte (specified/not+eq:)
5090(define_insn "vcmpneb"
5091  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5092	 (not:V16QI
5093	   (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5094		     (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5095  "TARGET_P9_VECTOR"
5096  "vcmpneb %0,%1,%2"
5097  [(set_attr "type" "vecsimple")])
5098
5099;; Vector Compare Not Equal or Zero Byte
5100(define_insn "vcmpnezb"
5101  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5102	(unspec:V16QI
5103	 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5104	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5105	 UNSPEC_VCMPNEZB))]
5106  "TARGET_P9_VECTOR"
5107  "vcmpnezb %0,%1,%2"
5108  [(set_attr "type" "vecsimple")])
5109
5110;; Vector Compare Not Equal Half Word (specified/not+eq:)
5111(define_insn "vcmpneh"
5112  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5113	(not:V8HI
5114	  (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5115		   (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5116  "TARGET_P9_VECTOR"
5117  "vcmpneh %0,%1,%2"
5118  [(set_attr "type" "vecsimple")])
5119
5120;; Vector Compare Not Equal or Zero Half Word
5121(define_insn "vcmpnezh"
5122  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5123	(unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5124		      (match_operand:V8HI 2 "altivec_register_operand" "v")]
5125	 UNSPEC_VCMPNEZH))]
5126  "TARGET_P9_VECTOR"
5127  "vcmpnezh %0,%1,%2"
5128  [(set_attr "type" "vecsimple")])
5129
5130;; Vector Compare Not Equal Word (specified/not+eq:)
5131(define_insn "vcmpnew"
5132  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5133	(not:V4SI
5134	  (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5135		   (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5136  "TARGET_P9_VECTOR"
5137  "vcmpnew %0,%1,%2"
5138  [(set_attr "type" "vecsimple")])
5139
5140;; Vector Compare Not Equal or Zero Word
5141(define_insn "vcmpnezw"
5142  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5143	(unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5144		      (match_operand:V4SI 2 "altivec_register_operand" "v")]
5145	 UNSPEC_VCMPNEZW))]
5146  "TARGET_P9_VECTOR"
5147  "vcmpnezw %0,%1,%2"
5148  [(set_attr "type" "vecsimple")])
5149
5150;; Vector Count Leading Zero Least-Significant Bits Byte
5151(define_insn "vclzlsbb_<mode>"
5152  [(set (match_operand:SI 0 "register_operand" "=r")
5153	(unspec:SI
5154	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5155	 UNSPEC_VCLZLSBB))]
5156  "TARGET_P9_VECTOR"
5157  "vclzlsbb %0,%1"
5158  [(set_attr "type" "vecsimple")])
5159
5160;; Vector Count Trailing Zero Least-Significant Bits Byte
5161(define_insn "vctzlsbb_<mode>"
5162  [(set (match_operand:SI 0 "register_operand" "=r")
5163	(unspec:SI
5164	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5165	 UNSPEC_VCTZLSBB))]
5166  "TARGET_P9_VECTOR"
5167  "vctzlsbb %0,%1"
5168  [(set_attr "type" "vecsimple")])
5169
5170;; Vector Extract Unsigned Byte Left-Indexed
5171(define_insn "vextublx"
5172  [(set (match_operand:SI 0 "register_operand" "=r")
5173	(unspec:SI
5174	 [(match_operand:SI 1 "register_operand" "r")
5175	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5176	 UNSPEC_VEXTUBLX))]
5177  "TARGET_P9_VECTOR"
5178  "vextublx %0,%1,%2"
5179  [(set_attr "type" "vecsimple")])
5180
5181;; Vector Extract Unsigned Byte Right-Indexed
5182(define_insn "vextubrx"
5183  [(set (match_operand:SI 0 "register_operand" "=r")
5184	(unspec:SI
5185	 [(match_operand:SI 1 "register_operand" "r")
5186	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5187	 UNSPEC_VEXTUBRX))]
5188  "TARGET_P9_VECTOR"
5189  "vextubrx %0,%1,%2"
5190  [(set_attr "type" "vecsimple")])
5191
5192;; Vector Extract Unsigned Half Word Left-Indexed
5193(define_insn "vextuhlx"
5194  [(set (match_operand:SI 0 "register_operand" "=r")
5195	(unspec:SI
5196	 [(match_operand:SI 1 "register_operand" "r")
5197	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5198	 UNSPEC_VEXTUHLX))]
5199  "TARGET_P9_VECTOR"
5200  "vextuhlx %0,%1,%2"
5201  [(set_attr "type" "vecsimple")])
5202
5203;; Vector Extract Unsigned Half Word Right-Indexed
5204(define_insn "vextuhrx"
5205  [(set (match_operand:SI 0 "register_operand" "=r")
5206	(unspec:SI
5207	 [(match_operand:SI 1 "register_operand" "r")
5208	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5209	 UNSPEC_VEXTUHRX))]
5210  "TARGET_P9_VECTOR"
5211  "vextuhrx %0,%1,%2"
5212  [(set_attr "type" "vecsimple")])
5213
5214;; Vector Extract Unsigned Word Left-Indexed
5215(define_insn "vextuwlx"
5216  [(set (match_operand:SI 0 "register_operand" "=r")
5217	(unspec:SI
5218	 [(match_operand:SI 1 "register_operand" "r")
5219	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5220	 UNSPEC_VEXTUWLX))]
5221  "TARGET_P9_VECTOR"
5222  "vextuwlx %0,%1,%2"
5223  [(set_attr "type" "vecsimple")])
5224
5225;; Vector Extract Unsigned Word Right-Indexed
5226(define_insn "vextuwrx"
5227  [(set (match_operand:SI 0 "register_operand" "=r")
5228	(unspec:SI
5229	 [(match_operand:SI 1 "register_operand" "r")
5230	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5231	 UNSPEC_VEXTUWRX))]
5232  "TARGET_P9_VECTOR"
5233  "vextuwrx %0,%1,%2"
5234  [(set_attr "type" "vecsimple")])
5235
5236;; Vector insert/extract word at arbitrary byte values.  Note, the little
5237;; endian version needs to adjust the byte number, and the V4SI element in
5238;; vinsert4b.
5239(define_insn "extract4b"
5240  [(set (match_operand:V2DI 0 "vsx_register_operand")
5241       (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5242                     (match_operand:QI 2 "const_0_to_12_operand" "n")]
5243                    UNSPEC_XXEXTRACTUW))]
5244  "TARGET_P9_VECTOR"
5245{
5246  if (!VECTOR_ELT_ORDER_BIG)
5247    operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5248
5249  return "xxextractuw %x0,%x1,%2";
5250})
5251
5252(define_expand "insert4b"
5253  [(set (match_operand:V16QI 0 "vsx_register_operand")
5254	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5255		       (match_operand:V16QI 2 "vsx_register_operand")
5256		       (match_operand:QI 3 "const_0_to_12_operand")]
5257		   UNSPEC_XXINSERTW))]
5258  "TARGET_P9_VECTOR"
5259{
5260  if (!VECTOR_ELT_ORDER_BIG)
5261    {
5262      rtx op1 = operands[1];
5263      rtx v4si_tmp = gen_reg_rtx (V4SImode);
5264      emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5265      operands[1] = v4si_tmp;
5266      operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5267    }
5268})
5269
5270(define_insn "*insert4b_internal"
5271  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5272	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5273		       (match_operand:V16QI 2 "vsx_register_operand" "0")
5274		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
5275		   UNSPEC_XXINSERTW))]
5276  "TARGET_P9_VECTOR"
5277  "xxinsertw %x0,%x1,%3"
5278  [(set_attr "type" "vecperm")])
5279
5280
5281;; Generate vector extract four float 32 values from left four elements
5282;; of eight element vector of float 16 values.
5283(define_expand "vextract_fp_from_shorth"
5284  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5285	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5286   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5287  "TARGET_P9_VECTOR"
5288{
5289  int i;
5290  int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5291  int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5292
5293  rtx rvals[16];
5294  rtx mask = gen_reg_rtx (V16QImode);
5295  rtx tmp = gen_reg_rtx (V16QImode);
5296  rtvec v;
5297
5298  for (i = 0; i < 16; i++)
5299    if (!BYTES_BIG_ENDIAN)
5300      rvals[i] = GEN_INT (vals_le[i]);
5301    else
5302      rvals[i] = GEN_INT (vals_be[i]);
5303
5304  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5305     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5306     src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5307     conversion instruction.  */
5308  v = gen_rtvec_v (16, rvals);
5309  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5310  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5311					  operands[1], mask));
5312  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5313  DONE;
5314})
5315
5316;; Generate vector extract four float 32 values from right four elements
5317;; of eight element vector of float 16 values.
5318(define_expand "vextract_fp_from_shortl"
5319  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5320	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5321	UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5322  "TARGET_P9_VECTOR"
5323{
5324  int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5325  int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5326
5327  int i;
5328  rtx rvals[16];
5329  rtx mask = gen_reg_rtx (V16QImode);
5330  rtx tmp = gen_reg_rtx (V16QImode);
5331  rtvec v;
5332
5333  for (i = 0; i < 16; i++)
5334    if (!BYTES_BIG_ENDIAN)
5335      rvals[i] = GEN_INT (vals_le[i]);
5336    else
5337      rvals[i] = GEN_INT (vals_be[i]);
5338
5339  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5340     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5341     src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5342     conversion instruction.  */
5343  v = gen_rtvec_v (16, rvals);
5344  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5345  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5346					  operands[1], mask));
5347  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5348  DONE;
5349})
5350
5351;; Support for ISA 3.0 vector byte reverse
5352
5353;; Swap all bytes with in a vector
5354(define_insn "p9_xxbrq_v1ti"
5355  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5356	(bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5357  "TARGET_P9_VECTOR"
5358  "xxbrq %x0,%x1"
5359  [(set_attr "type" "vecperm")])
5360
5361(define_expand "p9_xxbrq_v16qi"
5362  [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5363   (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5364  "TARGET_P9_VECTOR"
5365{
5366  rtx op0 = gen_reg_rtx (V1TImode);
5367  rtx op1 = gen_lowpart (V1TImode, operands[1]);
5368  emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5369  emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5370  DONE;
5371})
5372
5373;; Swap all bytes in each 64-bit element
5374(define_insn "p9_xxbrd_v2di"
5375  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5376	(bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5377  "TARGET_P9_VECTOR"
5378  "xxbrd %x0,%x1"
5379  [(set_attr "type" "vecperm")])
5380
5381(define_expand "p9_xxbrd_v2df"
5382  [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5383   (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5384  "TARGET_P9_VECTOR"
5385{
5386  rtx op0 = gen_reg_rtx (V2DImode);
5387  rtx op1 = gen_lowpart (V2DImode, operands[1]);
5388  emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5389  emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5390  DONE;
5391})
5392
5393;; Swap all bytes in each 32-bit element
5394(define_insn "p9_xxbrw_v4si"
5395  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5396	(bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5397  "TARGET_P9_VECTOR"
5398  "xxbrw %x0,%x1"
5399  [(set_attr "type" "vecperm")])
5400
5401(define_expand "p9_xxbrw_v4sf"
5402  [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5403   (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5404  "TARGET_P9_VECTOR"
5405{
5406  rtx op0 = gen_reg_rtx (V4SImode);
5407  rtx op1 = gen_lowpart (V4SImode, operands[1]);
5408  emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5409  emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5410  DONE;
5411})
5412
5413;; Swap all bytes in each element of vector
5414(define_expand "revb_<mode>"
5415  [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5416   (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5417  ""
5418{
5419  if (TARGET_P9_VECTOR)
5420    emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5421  else
5422    {
5423      /* Want to have the elements in reverse order relative
5424	 to the endian mode in use, i.e. in LE mode, put elements
5425	 in BE order.  */
5426      rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5427      emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5428					   operands[1], sel));
5429    }
5430
5431  DONE;
5432})
5433
5434;; Reversing bytes in vector char is just a NOP.
5435(define_expand "revb_v16qi"
5436  [(set (match_operand:V16QI 0 "vsx_register_operand")
5437	(bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5438  ""
5439{
5440  emit_move_insn (operands[0], operands[1]);
5441  DONE;
5442})
5443
5444;; Swap all bytes in each 16-bit element
5445(define_insn "p9_xxbrh_v8hi"
5446  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5447	(bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5448  "TARGET_P9_VECTOR"
5449  "xxbrh %x0,%x1"
5450  [(set_attr "type" "vecperm")])
5451
5452
5453;; Operand numbers for the following peephole2
5454(define_constants
5455  [(SFBOOL_TMP_GPR		 0)		;; GPR temporary
5456   (SFBOOL_TMP_VSX		 1)		;; vector temporary
5457   (SFBOOL_MFVSR_D		 2)		;; move to gpr dest
5458   (SFBOOL_MFVSR_A		 3)		;; move to gpr src
5459   (SFBOOL_BOOL_D		 4)		;; and/ior/xor dest
5460   (SFBOOL_BOOL_A1		 5)		;; and/ior/xor arg1
5461   (SFBOOL_BOOL_A2		 6)		;; and/ior/xor arg1
5462   (SFBOOL_SHL_D		 7)		;; shift left dest
5463   (SFBOOL_SHL_A		 8)		;; shift left arg
5464   (SFBOOL_MTVSR_D		 9)		;; move to vecter dest
5465   (SFBOOL_MFVSR_A_V4SF		10)		;; SFBOOL_MFVSR_A as V4SFmode
5466   (SFBOOL_BOOL_A_DI		11)		;; SFBOOL_BOOL_A1/A2 as DImode
5467   (SFBOOL_TMP_VSX_DI		12)		;; SFBOOL_TMP_VSX as DImode
5468   (SFBOOL_MTVSR_D_V4SF		13)])		;; SFBOOL_MTVSRD_D as V4SFmode
5469
5470;; Attempt to optimize some common GLIBC operations using logical operations to
5471;; pick apart SFmode operations.  For example, there is code from e_powf.c
5472;; after macro expansion that looks like:
5473;;
5474;;	typedef union {
5475;;	  float value;
5476;;	  uint32_t word;
5477;;	} ieee_float_shape_type;
5478;;
5479;;	float t1;
5480;;	int32_t is;
5481;;
5482;;	do {
5483;;	  ieee_float_shape_type gf_u;
5484;;	  gf_u.value = (t1);
5485;;	  (is) = gf_u.word;
5486;;	} while (0);
5487;;
5488;;	do {
5489;;	  ieee_float_shape_type sf_u;
5490;;	  sf_u.word = (is & 0xfffff000);
5491;;	  (t1) = sf_u.value;
5492;;	} while (0);
5493;;
5494;;
5495;; This would result in two direct move operations (convert to memory format,
5496;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5497;; scalar format).  With this peephole, we eliminate the direct move to the
5498;; GPR, and instead move the integer mask value to the vector register after a
5499;; shift and do the VSX logical operation.
5500
5501;; The insns for dealing with SFmode in GPR registers looks like:
5502;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5503;;
5504;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5505;;
5506;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5507;;
5508;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5509;;
5510;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5511;;
5512;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5513
5514(define_peephole2
5515  [(match_scratch:DI SFBOOL_TMP_GPR "r")
5516   (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5517
5518   ;; MFVSRWZ (aka zero_extend)
5519   (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5520	(zero_extend:DI
5521	 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5522
5523   ;; AND/IOR/XOR operation on int
5524   (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5525	(and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5526			(match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5527
5528   ;; SLDI
5529   (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5530	(ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5531		   (const_int 32)))
5532
5533   ;; MTVSRD
5534   (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5535	(unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5536
5537  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5538   /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5539      to compare registers, when the mode is different.  */
5540   && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5541   && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5542   && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5543   && (REG_P (operands[SFBOOL_BOOL_A2])
5544       || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5545   && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5546       || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5547   && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5548       || (REG_P (operands[SFBOOL_BOOL_A2])
5549	   && REGNO (operands[SFBOOL_MFVSR_D])
5550		== REGNO (operands[SFBOOL_BOOL_A2])))
5551   && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5552   && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5553       || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5554   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5555  [(set (match_dup SFBOOL_TMP_GPR)
5556	(ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5557		   (const_int 32)))
5558
5559   (set (match_dup SFBOOL_TMP_VSX_DI)
5560	(match_dup SFBOOL_TMP_GPR))
5561
5562   (set (match_dup SFBOOL_MTVSR_D_V4SF)
5563	(and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5564			  (match_dup SFBOOL_TMP_VSX)))]
5565{
5566  rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5567  rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5568  int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5569  int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5570  int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5571  int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5572
5573  if (CONST_INT_P (bool_a2))
5574    {
5575      rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5576      emit_move_insn (tmp_gpr, bool_a2);
5577      operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5578    }
5579  else
5580    {
5581      int regno_bool_a1 = REGNO (bool_a1);
5582      int regno_bool_a2 = REGNO (bool_a2);
5583      int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5584			  ? regno_bool_a2 : regno_bool_a1);
5585      operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5586    }
5587
5588  operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5589  operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5590  operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5591})
5592