1;; VSX patterns.
2;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Iterator for comparison types
22(define_code_iterator CMP_TEST [eq lt gt unordered])
23
24;; Mode attribute for vector floate and floato conversions
25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
27;; Iterator for both scalar and vector floating point types supported by VSX
28(define_mode_iterator VSX_B [DF V4SF V2DF])
29
30;; Iterator for the 2 64-bit vector types
31(define_mode_iterator VSX_D [V2DF V2DI])
32
33;; Mode iterator to handle swapping words on little endian for the 128-bit
34;; types that goes in a single vector register.
35(define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
36				  (TF   "FLOAT128_VECTOR_P (TFmode)")
37				  TI
38				  V1TI])
39
40;; Iterator for 128-bit integer types that go in a single vector register.
41(define_mode_iterator VSX_TI [TI V1TI])
42
43;; Iterator for the 2 32-bit vector types
44(define_mode_iterator VSX_W [V4SF V4SI])
45
46;; Iterator for the DF types
47(define_mode_iterator VSX_DF [V2DF DF])
48
49;; Iterator for vector floating point types supported by VSX
50(define_mode_iterator VSX_F [V4SF V2DF])
51
52;; Iterator for logical types supported by VSX
53(define_mode_iterator VSX_L [V16QI
54			     V8HI
55			     V4SI
56			     V2DI
57			     V4SF
58			     V2DF
59			     V1TI
60			     TI
61			     (KF	"FLOAT128_VECTOR_P (KFmode)")
62			     (TF	"FLOAT128_VECTOR_P (TFmode)")])
63
64;; Iterator for memory moves.
65(define_mode_iterator VSX_M [V16QI
66			     V8HI
67			     V4SI
68			     V2DI
69			     V4SF
70			     V2DF
71			     V1TI
72			     (KF	"FLOAT128_VECTOR_P (KFmode)")
73			     (TF	"FLOAT128_VECTOR_P (TFmode)")
74			     TI])
75
76(define_mode_attr VSX_XXBR  [(V8HI  "h")
77			     (V4SI  "w")
78			     (V4SF  "w")
79			     (V2DF  "d")
80			     (V2DI  "d")
81			     (V1TI  "q")])
82
83;; Map into the appropriate load/store name based on the type
84(define_mode_attr VSm  [(V16QI "vw4")
85			(V8HI  "vw4")
86			(V4SI  "vw4")
87			(V4SF  "vw4")
88			(V2DF  "vd2")
89			(V2DI  "vd2")
90			(DF    "d")
91			(TF    "vd2")
92			(KF    "vd2")
93			(V1TI  "vd2")
94			(TI    "vd2")])
95
96;; Map the register class used
97(define_mode_attr VSr	[(V16QI "v")
98			 (V8HI  "v")
99			 (V4SI  "v")
100			 (V4SF  "wa")
101			 (V2DI  "wa")
102			 (V2DF  "wa")
103			 (DI	"wa")
104			 (DF    "wa")
105			 (SF	"wa")
106			 (TF	"wa")
107			 (KF	"wa")
108			 (V1TI  "v")
109			 (TI    "wa")])
110
111;; What value we need in the "isa" field, to make the IEEE QP float work.
112(define_mode_attr VSisa	[(V16QI "*")
113			 (V8HI  "*")
114			 (V4SI  "*")
115			 (V4SF  "*")
116			 (V2DI  "*")
117			 (V2DF  "*")
118			 (DI	"*")
119			 (DF    "*")
120			 (SF	"*")
121			 (V1TI	"*")
122			 (TI    "*")
123			 (TF	"p9tf")
124			 (KF	"p9kf")])
125
126;; A mode attribute to disparage use of GPR registers, except for scalar
127;; integer modes.
128(define_mode_attr ??r	[(V16QI	"??r")
129			 (V8HI	"??r")
130			 (V4SI	"??r")
131			 (V4SF	"??r")
132			 (V2DI	"??r")
133			 (V2DF	"??r")
134			 (V1TI	"??r")
135			 (KF	"??r")
136			 (TF	"??r")
137			 (TI	"r")])
138
139;; A mode attribute used for 128-bit constant values.
140(define_mode_attr nW	[(V16QI	"W")
141			 (V8HI	"W")
142			 (V4SI	"W")
143			 (V4SF	"W")
144			 (V2DI	"W")
145			 (V2DF	"W")
146			 (V1TI	"W")
147			 (KF	"W")
148			 (TF	"W")
149			 (TI	"n")])
150
151;; Same size integer type for floating point data
152(define_mode_attr VSi [(V4SF  "v4si")
153		       (V2DF  "v2di")
154		       (DF    "di")])
155
156(define_mode_attr VSI [(V4SF  "V4SI")
157		       (V2DF  "V2DI")
158		       (DF    "DI")])
159
160;; Word size for same size conversion
161(define_mode_attr VSc [(V4SF "w")
162		       (V2DF "d")
163		       (DF   "d")])
164
165;; Map into either s or v, depending on whether this is a scalar or vector
166;; operation
167(define_mode_attr VSv	[(V16QI "v")
168			 (V8HI  "v")
169			 (V4SI  "v")
170			 (V4SF  "v")
171			 (V2DI  "v")
172			 (V2DF  "v")
173			 (V1TI  "v")
174			 (DF    "s")
175			 (KF	"v")])
176
177;; Appropriate type for add ops (and other simple FP ops)
178(define_mode_attr VStype_simple	[(V2DF "vecdouble")
179				 (V4SF "vecfloat")
180				 (DF   "fp")])
181
182;; Appropriate type for multiply ops
183(define_mode_attr VStype_mul	[(V2DF "vecdouble")
184				 (V4SF "vecfloat")
185				 (DF   "dmul")])
186
187;; Appropriate type for divide ops.
188(define_mode_attr VStype_div	[(V2DF "vecdiv")
189				 (V4SF "vecfdiv")
190				 (DF   "ddiv")])
191
192;; Map the scalar mode for a vector type
193(define_mode_attr VS_scalar [(V1TI	"TI")
194			     (V2DF	"DF")
195			     (V2DI	"DI")
196			     (V4SF	"SF")
197			     (V4SI	"SI")
198			     (V8HI	"HI")
199			     (V16QI	"QI")])
200
201;; Map to a double-sized vector mode
202(define_mode_attr VS_double [(V4SI	"V8SI")
203			     (V4SF	"V8SF")
204			     (V2DI	"V4DI")
205			     (V2DF	"V4DF")
206			     (V1TI	"V2TI")])
207
208;; Iterators for loading constants with xxspltib
209(define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
210(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
211
212;; Vector reverse byte modes
213(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
214
215;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
216;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
217;; done on ISA 2.07 and not just ISA 3.0.
218(define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
219(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
220
221(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
222		  		     (V8HI "h")
223				     (V4SI "w")])
224
225;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
226;; insert to validate the operand number.
227(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
228					 (V8HI  "const_0_to_7_operand")
229					 (V4SI  "const_0_to_3_operand")])
230
231;; Mode attribute to give the constraint for vector extract and insert
232;; operations.
233(define_mode_attr VSX_EX [(V16QI "v")
234			  (V8HI  "v")
235			  (V4SI  "wa")])
236
237;; Mode iterator for binary floating types other than double to
238;; optimize convert to that floating point type from an extract
239;; of an integer type
240(define_mode_iterator VSX_EXTRACT_FL [SF
241				      (IF "FLOAT128_2REG_P (IFmode)")
242				      (KF "TARGET_FLOAT128_HW")
243				      (TF "FLOAT128_2REG_P (TFmode)
244					   || (FLOAT128_IEEE_P (TFmode)
245					       && TARGET_FLOAT128_HW)")])
246
247;; Mode iterator for binary floating types that have a direct conversion
248;; from 64-bit integer to floating point
249(define_mode_iterator FL_CONV [SF
250			       DF
251			       (KF "TARGET_FLOAT128_HW")
252			       (TF "TARGET_FLOAT128_HW
253				    && FLOAT128_IEEE_P (TFmode)")])
254
255;; Iterator for the 2 short vector types to do a splat from an integer
256(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
257
258;; Mode attribute to give the count for the splat instruction to splat
259;; the value in the 64-bit integer slot
260(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
261
262;; Mode attribute to give the suffix for the splat instruction
263(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
264
265;; Constants for creating unspecs
266(define_c_enum "unspec"
267  [UNSPEC_VSX_CONCAT
268   UNSPEC_VSX_CVDPSXWS
269   UNSPEC_VSX_CVDPUXWS
270   UNSPEC_VSX_CVSPDP
271   UNSPEC_VSX_CVHPSP
272   UNSPEC_VSX_CVSPDPN
273   UNSPEC_VSX_CVDPSPN
274   UNSPEC_VSX_CVSXWDP
275   UNSPEC_VSX_CVUXWDP
276   UNSPEC_VSX_CVSXDSP
277   UNSPEC_VSX_CVUXDSP
278   UNSPEC_VSX_FLOAT2
279   UNSPEC_VSX_UNS_FLOAT2
280   UNSPEC_VSX_FLOATE
281   UNSPEC_VSX_UNS_FLOATE
282   UNSPEC_VSX_FLOATO
283   UNSPEC_VSX_UNS_FLOATO
284   UNSPEC_VSX_TDIV
285   UNSPEC_VSX_TSQRT
286   UNSPEC_VSX_SET
287   UNSPEC_VSX_ROUND_I
288   UNSPEC_VSX_ROUND_IC
289   UNSPEC_VSX_SLDWI
290   UNSPEC_VSX_XXPERM
291
292   UNSPEC_VSX_XXSPLTW
293   UNSPEC_VSX_XXSPLTD
294   UNSPEC_VSX_DIVSD
295   UNSPEC_VSX_DIVUD
296   UNSPEC_VSX_MULSD
297   UNSPEC_VSX_SIGN_EXTEND
298   UNSPEC_VSX_XVCVBF16SPN
299   UNSPEC_VSX_XVCVSPBF16
300   UNSPEC_VSX_XVCVSPSXDS
301   UNSPEC_VSX_XVCVSPHP
302   UNSPEC_VSX_VSLO
303   UNSPEC_VSX_EXTRACT
304   UNSPEC_VSX_SXEXPDP
305   UNSPEC_VSX_SXSIG
306   UNSPEC_VSX_SIEXPDP
307   UNSPEC_VSX_SIEXPQP
308   UNSPEC_VSX_SCMPEXPDP
309   UNSPEC_VSX_SCMPEXPQP
310   UNSPEC_VSX_STSTDC
311   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
312   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
313   UNSPEC_VSX_VXEXP
314   UNSPEC_VSX_VXSIG
315   UNSPEC_VSX_VIEXP
316   UNSPEC_VSX_VTSTDC
317   UNSPEC_VSX_VSIGNED2
318
319   UNSPEC_LXVL
320   UNSPEC_LXVLL
321   UNSPEC_LVSL_REG
322   UNSPEC_LVSR_REG
323   UNSPEC_STXVL
324   UNSPEC_STXVLL
325   UNSPEC_XL_LEN_R
326   UNSPEC_XST_LEN_R
327
328   UNSPEC_VCLZLSBB
329   UNSPEC_VCTZLSBB
330   UNSPEC_VEXTUBLX
331   UNSPEC_VEXTUHLX
332   UNSPEC_VEXTUWLX
333   UNSPEC_VEXTUBRX
334   UNSPEC_VEXTUHRX
335   UNSPEC_VEXTUWRX
336   UNSPEC_VCMPNEB
337   UNSPEC_VCMPNEZB
338   UNSPEC_VCMPNEH
339   UNSPEC_VCMPNEZH
340   UNSPEC_VCMPNEW
341   UNSPEC_VCMPNEZW
342   UNSPEC_XXEXTRACTUW
343   UNSPEC_XXINSERTW
344   UNSPEC_VSX_FIRST_MATCH_INDEX
345   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
346   UNSPEC_VSX_FIRST_MISMATCH_INDEX
347   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
348  ])
349
350(define_int_iterator XVCVBF16	[UNSPEC_VSX_XVCVSPBF16
351				 UNSPEC_VSX_XVCVBF16SPN])
352
353(define_int_attr xvcvbf16       [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
354				 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
355
356;; VSX moves
357
358;; The patterns for LE permuted loads and stores come before the general
359;; VSX moves so they match first.
360(define_insn_and_split "*vsx_le_perm_load_<mode>"
361  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
362        (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
363  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
364  "#"
365  "&& 1"
366  [(set (match_dup 2)
367        (vec_select:<MODE>
368          (match_dup 1)
369          (parallel [(const_int 1) (const_int 0)])))
370   (set (match_dup 0)
371        (vec_select:<MODE>
372          (match_dup 2)
373          (parallel [(const_int 1) (const_int 0)])))]
374{
375  rtx mem = operands[1];
376
377  /* Don't apply the swap optimization if we've already performed register
378     allocation and the hard register destination is not in the altivec
379     range.  */
380  if ((MEM_ALIGN (mem) >= 128)
381      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
382	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
383    {
384      rtx mem_address = XEXP (mem, 0);
385      enum machine_mode mode = GET_MODE (mem);
386
387      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
388        {
389	  /* Replace the source memory address with masked address.  */
390          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
391	  emit_insn (lvx_set_expr);
392	  DONE;
393        }
394      else if (rs6000_quadword_masked_address_p (mem_address))
395        {
396	  /* This rtl is already in the form that matches lvx
397	     instruction, so leave it alone.  */
398	  DONE;
399        }
400      /* Otherwise, fall through to transform into a swapping load.  */
401    }
402  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
403                                       : operands[0];
404}
405  [(set_attr "type" "vecload")
406   (set_attr "length" "8")])
407
408(define_insn_and_split "*vsx_le_perm_load_<mode>"
409  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
410        (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
411  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
412  "#"
413  "&& 1"
414  [(set (match_dup 2)
415        (vec_select:<MODE>
416          (match_dup 1)
417          (parallel [(const_int 2) (const_int 3)
418                     (const_int 0) (const_int 1)])))
419   (set (match_dup 0)
420        (vec_select:<MODE>
421          (match_dup 2)
422          (parallel [(const_int 2) (const_int 3)
423                     (const_int 0) (const_int 1)])))]
424{
425  rtx mem = operands[1];
426
427  /* Don't apply the swap optimization if we've already performed register
428     allocation and the hard register destination is not in the altivec
429     range.  */
430  if ((MEM_ALIGN (mem) >= 128)
431      && (!HARD_REGISTER_P (operands[0])
432	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
433    {
434      rtx mem_address = XEXP (mem, 0);
435      enum machine_mode mode = GET_MODE (mem);
436
437      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
438        {
439	  /* Replace the source memory address with masked address.  */
440          rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
441	  emit_insn (lvx_set_expr);
442	  DONE;
443        }
444      else if (rs6000_quadword_masked_address_p (mem_address))
445        {
446	  /* This rtl is already in the form that matches lvx
447	     instruction, so leave it alone.  */
448	  DONE;
449        }
450      /* Otherwise, fall through to transform into a swapping load.  */
451    }
452  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
453                                       : operands[0];
454}
455  [(set_attr "type" "vecload")
456   (set_attr "length" "8")])
457
458(define_insn_and_split "*vsx_le_perm_load_v8hi"
459  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
460        (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
461  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
462  "#"
463  "&& 1"
464  [(set (match_dup 2)
465        (vec_select:V8HI
466          (match_dup 1)
467          (parallel [(const_int 4) (const_int 5)
468                     (const_int 6) (const_int 7)
469                     (const_int 0) (const_int 1)
470                     (const_int 2) (const_int 3)])))
471   (set (match_dup 0)
472        (vec_select:V8HI
473          (match_dup 2)
474          (parallel [(const_int 4) (const_int 5)
475                     (const_int 6) (const_int 7)
476                     (const_int 0) (const_int 1)
477                     (const_int 2) (const_int 3)])))]
478{
479  rtx mem = operands[1];
480
481  /* Don't apply the swap optimization if we've already performed register
482     allocation and the hard register destination is not in the altivec
483     range.  */
484  if ((MEM_ALIGN (mem) >= 128)
485      && (!HARD_REGISTER_P (operands[0])
486	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
487    {
488      rtx mem_address = XEXP (mem, 0);
489      enum machine_mode mode = GET_MODE (mem);
490
491      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
492        {
493	  /* Replace the source memory address with masked address.  */
494	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
495	  emit_insn (lvx_set_expr);
496	  DONE;
497        }
498      else if (rs6000_quadword_masked_address_p (mem_address))
499        {
500	  /* This rtl is already in the form that matches lvx
501	     instruction, so leave it alone.  */
502	  DONE;
503        }
504      /* Otherwise, fall through to transform into a swapping load.  */
505    }
506  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
507                                       : operands[0];
508}
509  [(set_attr "type" "vecload")
510   (set_attr "length" "8")])
511
512(define_insn_and_split "*vsx_le_perm_load_v16qi"
513  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
514        (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
515  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
516  "#"
517  "&& 1"
518  [(set (match_dup 2)
519        (vec_select:V16QI
520          (match_dup 1)
521          (parallel [(const_int 8) (const_int 9)
522                     (const_int 10) (const_int 11)
523                     (const_int 12) (const_int 13)
524                     (const_int 14) (const_int 15)
525                     (const_int 0) (const_int 1)
526                     (const_int 2) (const_int 3)
527                     (const_int 4) (const_int 5)
528                     (const_int 6) (const_int 7)])))
529   (set (match_dup 0)
530        (vec_select:V16QI
531          (match_dup 2)
532          (parallel [(const_int 8) (const_int 9)
533                     (const_int 10) (const_int 11)
534                     (const_int 12) (const_int 13)
535                     (const_int 14) (const_int 15)
536                     (const_int 0) (const_int 1)
537                     (const_int 2) (const_int 3)
538                     (const_int 4) (const_int 5)
539                     (const_int 6) (const_int 7)])))]
540{
541  rtx mem = operands[1];
542
543  /* Don't apply the swap optimization if we've already performed register
544     allocation and the hard register destination is not in the altivec
545     range.  */
546  if ((MEM_ALIGN (mem) >= 128)
547      && (!HARD_REGISTER_P (operands[0])
548	  || ALTIVEC_REGNO_P (REGNO(operands[0]))))
549    {
550      rtx mem_address = XEXP (mem, 0);
551      enum machine_mode mode = GET_MODE (mem);
552
553      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
554        {
555	  /* Replace the source memory address with masked address.  */
556	  rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
557	  emit_insn (lvx_set_expr);
558	  DONE;
559        }
560      else if (rs6000_quadword_masked_address_p (mem_address))
561        {
562	  /* This rtl is already in the form that matches lvx
563	     instruction, so leave it alone.  */
564	  DONE;
565        }
566      /* Otherwise, fall through to transform into a swapping load.  */
567    }
568  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
569                                       : operands[0];
570}
571  [(set_attr "type" "vecload")
572   (set_attr "length" "8")])
573
574(define_insn "*vsx_le_perm_store_<mode>"
575  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
576        (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
577  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
578  "#"
579  [(set_attr "type" "vecstore")
580   (set_attr "length" "12")])
581
582(define_split
583  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
584        (match_operand:VSX_D 1 "vsx_register_operand"))]
585  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
586  [(set (match_dup 2)
587        (vec_select:<MODE>
588          (match_dup 1)
589          (parallel [(const_int 1) (const_int 0)])))
590   (set (match_dup 0)
591        (vec_select:<MODE>
592          (match_dup 2)
593          (parallel [(const_int 1) (const_int 0)])))]
594{
595  rtx mem = operands[0];
596
597  /* Don't apply the swap optimization if we've already performed register
598     allocation and the hard register source is not in the altivec range.  */
599  if ((MEM_ALIGN (mem) >= 128)
600      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
601	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
602    {
603      rtx mem_address = XEXP (mem, 0);
604      enum machine_mode mode = GET_MODE (mem);
605      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
606	{
607	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
608	  emit_insn (stvx_set_expr);
609	  DONE;
610	}
611      else if (rs6000_quadword_masked_address_p (mem_address))
612	{
613	  /* This rtl is already in the form that matches stvx instruction,
614	     so leave it alone.  */
615	  DONE;
616	}
617      /* Otherwise, fall through to transform into a swapping store.  */
618    }
619
620  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
621                                       : operands[1];
622})
623
624;; The post-reload split requires that we re-permute the source
625;; register in case it is still live.
626(define_split
627  [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
628        (match_operand:VSX_D 1 "vsx_register_operand"))]
629  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
630  [(set (match_dup 1)
631        (vec_select:<MODE>
632          (match_dup 1)
633          (parallel [(const_int 1) (const_int 0)])))
634   (set (match_dup 0)
635        (vec_select:<MODE>
636          (match_dup 1)
637          (parallel [(const_int 1) (const_int 0)])))
638   (set (match_dup 1)
639        (vec_select:<MODE>
640          (match_dup 1)
641          (parallel [(const_int 1) (const_int 0)])))]
642  "")
643
644(define_insn "*vsx_le_perm_store_<mode>"
645  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
646        (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
647  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
648  "#"
649  [(set_attr "type" "vecstore")
650   (set_attr "length" "12")])
651
652(define_split
653  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
654        (match_operand:VSX_W 1 "vsx_register_operand"))]
655  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
656  [(set (match_dup 2)
657        (vec_select:<MODE>
658          (match_dup 1)
659          (parallel [(const_int 2) (const_int 3)
660	             (const_int 0) (const_int 1)])))
661   (set (match_dup 0)
662        (vec_select:<MODE>
663          (match_dup 2)
664          (parallel [(const_int 2) (const_int 3)
665	             (const_int 0) (const_int 1)])))]
666{
667  rtx mem = operands[0];
668
669  /* Don't apply the swap optimization if we've already performed register
670     allocation and the hard register source is not in the altivec range.  */
671  if ((MEM_ALIGN (mem) >= 128)
672      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
673	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
674    {
675      rtx mem_address = XEXP (mem, 0);
676      enum machine_mode mode = GET_MODE (mem);
677      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
678	{
679	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
680	  emit_insn (stvx_set_expr);
681	  DONE;
682	}
683      else if (rs6000_quadword_masked_address_p (mem_address))
684	{
685	  /* This rtl is already in the form that matches stvx instruction,
686	     so leave it alone.  */
687	  DONE;
688	}
689      /* Otherwise, fall through to transform into a swapping store.  */
690    }
691
692  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
693                                       : operands[1];
694})
695
696;; The post-reload split requires that we re-permute the source
697;; register in case it is still live.
698(define_split
699  [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
700        (match_operand:VSX_W 1 "vsx_register_operand"))]
701  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
702  [(set (match_dup 1)
703        (vec_select:<MODE>
704          (match_dup 1)
705          (parallel [(const_int 2) (const_int 3)
706	             (const_int 0) (const_int 1)])))
707   (set (match_dup 0)
708        (vec_select:<MODE>
709          (match_dup 1)
710          (parallel [(const_int 2) (const_int 3)
711	             (const_int 0) (const_int 1)])))
712   (set (match_dup 1)
713        (vec_select:<MODE>
714          (match_dup 1)
715          (parallel [(const_int 2) (const_int 3)
716	             (const_int 0) (const_int 1)])))]
717  "")
718
719(define_insn "*vsx_le_perm_store_v8hi"
720  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
721        (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
722  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
723  "#"
724  [(set_attr "type" "vecstore")
725   (set_attr "length" "12")])
726
727(define_split
728  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
729        (match_operand:V8HI 1 "vsx_register_operand"))]
730  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
731  [(set (match_dup 2)
732        (vec_select:V8HI
733          (match_dup 1)
734          (parallel [(const_int 4) (const_int 5)
735                     (const_int 6) (const_int 7)
736                     (const_int 0) (const_int 1)
737                     (const_int 2) (const_int 3)])))
738   (set (match_dup 0)
739        (vec_select:V8HI
740          (match_dup 2)
741          (parallel [(const_int 4) (const_int 5)
742                     (const_int 6) (const_int 7)
743                     (const_int 0) (const_int 1)
744                     (const_int 2) (const_int 3)])))]
745{
746  rtx mem = operands[0];
747
748  /* Don't apply the swap optimization if we've already performed register
749     allocation and the hard register source is not in the altivec range.  */
750  if ((MEM_ALIGN (mem) >= 128)
751      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
752	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
753    {
754      rtx mem_address = XEXP (mem, 0);
755      enum machine_mode mode = GET_MODE (mem);
756      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
757	{
758	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
759	  emit_insn (stvx_set_expr);
760	  DONE;
761	}
762      else if (rs6000_quadword_masked_address_p (mem_address))
763	{
764	  /* This rtl is already in the form that matches stvx instruction,
765	     so leave it alone.  */
766	  DONE;
767	}
768      /* Otherwise, fall through to transform into a swapping store.  */
769    }
770
771  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
772                                       : operands[1];
773})
774
775;; The post-reload split requires that we re-permute the source
776;; register in case it is still live.
777(define_split
778  [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
779        (match_operand:V8HI 1 "vsx_register_operand"))]
780  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
781  [(set (match_dup 1)
782        (vec_select:V8HI
783          (match_dup 1)
784          (parallel [(const_int 4) (const_int 5)
785                     (const_int 6) (const_int 7)
786                     (const_int 0) (const_int 1)
787                     (const_int 2) (const_int 3)])))
788   (set (match_dup 0)
789        (vec_select:V8HI
790          (match_dup 1)
791          (parallel [(const_int 4) (const_int 5)
792                     (const_int 6) (const_int 7)
793                     (const_int 0) (const_int 1)
794                     (const_int 2) (const_int 3)])))
795   (set (match_dup 1)
796        (vec_select:V8HI
797          (match_dup 1)
798          (parallel [(const_int 4) (const_int 5)
799                     (const_int 6) (const_int 7)
800                     (const_int 0) (const_int 1)
801                     (const_int 2) (const_int 3)])))]
802  "")
803
804(define_insn "*vsx_le_perm_store_v16qi"
805  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
806        (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
807  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
808  "#"
809  [(set_attr "type" "vecstore")
810   (set_attr "length" "12")])
811
812(define_split
813  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
814        (match_operand:V16QI 1 "vsx_register_operand"))]
815  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
816  [(set (match_dup 2)
817        (vec_select:V16QI
818          (match_dup 1)
819          (parallel [(const_int 8) (const_int 9)
820                     (const_int 10) (const_int 11)
821                     (const_int 12) (const_int 13)
822                     (const_int 14) (const_int 15)
823                     (const_int 0) (const_int 1)
824                     (const_int 2) (const_int 3)
825                     (const_int 4) (const_int 5)
826                     (const_int 6) (const_int 7)])))
827   (set (match_dup 0)
828        (vec_select:V16QI
829          (match_dup 2)
830          (parallel [(const_int 8) (const_int 9)
831                     (const_int 10) (const_int 11)
832                     (const_int 12) (const_int 13)
833                     (const_int 14) (const_int 15)
834                     (const_int 0) (const_int 1)
835                     (const_int 2) (const_int 3)
836                     (const_int 4) (const_int 5)
837                     (const_int 6) (const_int 7)])))]
838{
839  rtx mem = operands[0];
840
841  /* Don't apply the swap optimization if we've already performed register
842     allocation and the hard register source is not in the altivec range.  */
843  if ((MEM_ALIGN (mem) >= 128)
844      && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
845	  || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
846    {
847      rtx mem_address = XEXP (mem, 0);
848      enum machine_mode mode = GET_MODE (mem);
849      if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
850	{
851	  rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
852	  emit_insn (stvx_set_expr);
853	  DONE;
854	}
855      else if (rs6000_quadword_masked_address_p (mem_address))
856	{
857	  /* This rtl is already in the form that matches stvx instruction,
858	     so leave it alone.  */
859	  DONE;
860	}
861      /* Otherwise, fall through to transform into a swapping store.  */
862    }
863
864  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
865                                       : operands[1];
866})
867
868;; The post-reload split requires that we re-permute the source
869;; register in case it is still live.
870(define_split
871  [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
872        (match_operand:V16QI 1 "vsx_register_operand"))]
873  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
874  [(set (match_dup 1)
875        (vec_select:V16QI
876          (match_dup 1)
877          (parallel [(const_int 8) (const_int 9)
878                     (const_int 10) (const_int 11)
879                     (const_int 12) (const_int 13)
880                     (const_int 14) (const_int 15)
881                     (const_int 0) (const_int 1)
882                     (const_int 2) (const_int 3)
883                     (const_int 4) (const_int 5)
884                     (const_int 6) (const_int 7)])))
885   (set (match_dup 0)
886        (vec_select:V16QI
887          (match_dup 1)
888          (parallel [(const_int 8) (const_int 9)
889                     (const_int 10) (const_int 11)
890                     (const_int 12) (const_int 13)
891                     (const_int 14) (const_int 15)
892                     (const_int 0) (const_int 1)
893                     (const_int 2) (const_int 3)
894                     (const_int 4) (const_int 5)
895                     (const_int 6) (const_int 7)])))
896   (set (match_dup 1)
897        (vec_select:V16QI
898          (match_dup 1)
899          (parallel [(const_int 8) (const_int 9)
900                     (const_int 10) (const_int 11)
901                     (const_int 12) (const_int 13)
902                     (const_int 14) (const_int 15)
903                     (const_int 0) (const_int 1)
904                     (const_int 2) (const_int 3)
905                     (const_int 4) (const_int 5)
906                     (const_int 6) (const_int 7)])))]
907  "")
908
909;; Little endian word swapping for 128-bit types that are either scalars or the
910;; special V1TI container class, which it is not appropriate to use vec_select
911;; for the type.
912(define_insn "*vsx_le_permute_<mode>"
913  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
914	(rotate:VSX_TI
915	 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
916	 (const_int 64)))]
917  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
918  "@
919   xxpermdi %x0,%x1,%x1,2
920   lxvd2x %x0,%y1
921   stxvd2x %x1,%y0
922   mr %0,%L1\;mr %L0,%1
923   ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
924   std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
925  [(set_attr "length" "*,*,*,8,8,8")
926   (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
927
928(define_insn_and_split "*vsx_le_undo_permute_<mode>"
929  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
930	(rotate:VSX_TI
931	 (rotate:VSX_TI
932	  (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
933	  (const_int 64))
934	 (const_int 64)))]
935  "!BYTES_BIG_ENDIAN && TARGET_VSX"
936  "@
937   #
938   xxlor %x0,%x1"
939  ""
940  [(set (match_dup 0) (match_dup 1))]
941{
942  if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
943    {
944      emit_note (NOTE_INSN_DELETED);
945      DONE;
946    }
947}
948  [(set_attr "length" "0,4")
949   (set_attr "type" "veclogical")])
950
951(define_insn_and_split "*vsx_le_perm_load_<mode>"
952  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
953        (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
954  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
955   && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
956  "@
957   #
958   #"
959  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
960   && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
961  [(const_int 0)]
962{
963  rtx tmp = (can_create_pseudo_p ()
964	     ? gen_reg_rtx_and_attrs (operands[0])
965	     : operands[0]);
966  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
967  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
968  DONE;
969}
970  [(set_attr "type" "vecload,load")
971   (set_attr "length" "8,8")
972   (set_attr "isa" "<VSisa>,*")])
973
974(define_insn "*vsx_le_perm_store_<mode>"
975  [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
976        (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
977  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
978   & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
979  "@
980   #
981   #"
982  [(set_attr "type" "vecstore,store")
983   (set_attr "length" "12,8")
984   (set_attr "isa" "<VSisa>,*")])
985
986(define_split
987  [(set (match_operand:VSX_LE_128 0 "memory_operand")
988        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
989  "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
990   && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
991  [(const_int 0)]
992{
993  rtx tmp = (can_create_pseudo_p ()
994	     ? gen_reg_rtx_and_attrs (operands[0])
995	     : operands[0]);
996  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
997  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
998  DONE;
999})
1000
1001;; Peepholes to catch loads and stores for TImode if TImode landed in
1002;; GPR registers on a little endian system.
1003(define_peephole2
1004  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1005	(rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1006		       (const_int 64)))
1007   (set (match_operand:VSX_TI 2 "int_reg_operand")
1008	(rotate:VSX_TI (match_dup 0)
1009		       (const_int 64)))]
1010  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1011   && (rtx_equal_p (operands[0], operands[2])
1012       || peep2_reg_dead_p (2, operands[0]))"
1013   [(set (match_dup 2) (match_dup 1))])
1014
1015(define_peephole2
1016  [(set (match_operand:VSX_TI 0 "int_reg_operand")
1017	(rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1018		       (const_int 64)))
1019   (set (match_operand:VSX_TI 2 "memory_operand")
1020	(rotate:VSX_TI (match_dup 0)
1021		       (const_int 64)))]
1022  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1023   && peep2_reg_dead_p (2, operands[0])"
1024   [(set (match_dup 2) (match_dup 1))])
1025
1026;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1027;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1028;; floating point are handled by the more generic swap elimination pass.
1029(define_peephole2
1030  [(set (match_operand:TI 0 "vsx_register_operand")
1031	(rotate:TI (match_operand:TI 1 "vsx_register_operand")
1032		   (const_int 64)))
1033   (set (match_operand:TI 2 "vsx_register_operand")
1034	(rotate:TI (match_dup 0)
1035		   (const_int 64)))]
1036  "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1037   && (rtx_equal_p (operands[0], operands[2])
1038       || peep2_reg_dead_p (2, operands[0]))"
1039   [(set (match_dup 2) (match_dup 1))])
1040
1041;; The post-reload split requires that we re-permute the source
1042;; register in case it is still live.
1043(define_split
1044  [(set (match_operand:VSX_LE_128 0 "memory_operand")
1045        (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1046  "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1047   && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1048  [(const_int 0)]
1049{
1050  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1051  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1052  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1053  DONE;
1054})
1055
1056;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1057;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1058(define_insn "xxspltib_v16qi"
1059  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1060	(vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1061  "TARGET_P9_VECTOR"
1062{
1063  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1064  return "xxspltib %x0,%2";
1065}
1066  [(set_attr "type" "vecperm")])
1067
1068(define_insn "xxspltib_<mode>_nosplit"
1069  [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1070	(match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1071  "TARGET_P9_VECTOR"
1072{
1073  rtx op1 = operands[1];
1074  int value = 256;
1075  int num_insns = -1;
1076
1077  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1078      || num_insns != 1)
1079    gcc_unreachable ();
1080
1081  operands[2] = GEN_INT (value & 0xff);
1082  return "xxspltib %x0,%2";
1083}
1084  [(set_attr "type" "vecperm")])
1085
1086(define_insn_and_split "*xxspltib_<mode>_split"
1087  [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1088	(match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1089  "TARGET_P9_VECTOR"
1090  "#"
1091  "&& 1"
1092  [(const_int 0)]
1093{
1094  int value = 256;
1095  int num_insns = -1;
1096  rtx op0 = operands[0];
1097  rtx op1 = operands[1];
1098  rtx tmp = ((can_create_pseudo_p ())
1099	     ? gen_reg_rtx (V16QImode)
1100	     : gen_lowpart (V16QImode, op0));
1101
1102  if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1103      || num_insns != 2)
1104    gcc_unreachable ();
1105
1106  emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1107
1108  if (<MODE>mode == V2DImode)
1109    emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
1110
1111  else if (<MODE>mode == V4SImode)
1112    emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1113
1114  else if (<MODE>mode == V8HImode)
1115    emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1116
1117  else
1118    gcc_unreachable ();
1119
1120  DONE;
1121}
1122  [(set_attr "type" "vecperm")
1123   (set_attr "length" "8")])
1124
1125
1126;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1127;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1128;; all 1's, since the machine does not have to wait for the previous
1129;; instruction using the register being set (such as a store waiting on a slow
1130;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1131
1132;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1133;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1134;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1135(define_insn "vsx_mov<mode>_64bit"
1136  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1137               "=ZwO,      wa,        wa,        r,         we,        ?wQ,
1138                ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
1139                ?wa,       v,         <??r>,     wZ,        v")
1140
1141	(match_operand:VSX_M 1 "input_operand"
1142               "wa,        ZwO,       wa,        we,        r,         r,
1143                wQ,        Y,         r,         r,         wE,        jwM,
1144                ?jwM,      W,         <nW>,      v,         wZ"))]
1145
1146  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1147   && (register_operand (operands[0], <MODE>mode)
1148       || register_operand (operands[1], <MODE>mode))"
1149{
1150  return rs6000_output_move_128bit (operands);
1151}
1152  [(set_attr "type"
1153               "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
1154                store,     load,      store,     *,         vecsimple, vecsimple,
1155                vecsimple, *,         *,         vecstore,  vecload")
1156   (set_attr "num_insns"
1157               "*,         *,         *,         2,         *,         2,
1158                2,         2,         2,         2,         *,         *,
1159                *,         5,         2,         *,         *")
1160   (set_attr "max_prefixed_insns"
1161               "*,         *,         *,         *,         *,         2,
1162                2,         2,         2,         2,         *,         *,
1163                *,         *,         *,         *,         *")
1164   (set_attr "length"
1165               "*,         *,         *,         8,         *,         8,
1166                8,         8,         8,         8,         *,         *,
1167                *,         20,        8,         *,         *")
1168   (set_attr "isa"
1169               "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1170                *,         *,         *,         *,         p9v,       *,
1171                <VSisa>,   *,         *,         *,         *")])
1172
1173;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1174;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1175;;              LVX (VMX)  STVX (VMX)
1176(define_insn "*vsx_mov<mode>_32bit"
1177  [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1178               "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
1179                wa,        v,         ?wa,       v,         <??r>,
1180                wZ,        v")
1181
1182	(match_operand:VSX_M 1 "input_operand"
1183               "wa,        ZwO,       wa,        Y,         r,         r,
1184                wE,        jwM,       ?jwM,      W,         <nW>,
1185                v,         wZ"))]
1186
1187  "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1188   && (register_operand (operands[0], <MODE>mode)
1189       || register_operand (operands[1], <MODE>mode))"
1190{
1191  return rs6000_output_move_128bit (operands);
1192}
1193  [(set_attr "type"
1194               "vecstore,  vecload,   vecsimple, load,      store,    *,
1195                vecsimple, vecsimple, vecsimple, *,         *,
1196                vecstore,  vecload")
1197   (set_attr "length"
1198               "*,         *,         *,         16,        16,        16,
1199                *,         *,         *,         20,        16,
1200                *,         *")
1201   (set_attr "isa"
1202               "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1203                p9v,       *,         <VSisa>,   *,         *,
1204                *,         *")])
1205
1206;; Explicit  load/store expanders for the builtin functions
1207(define_expand "vsx_load_<mode>"
1208  [(set (match_operand:VSX_M 0 "vsx_register_operand")
1209	(match_operand:VSX_M 1 "memory_operand"))]
1210  "VECTOR_MEM_VSX_P (<MODE>mode)"
1211{
1212  /* Expand to swaps if needed, prior to swap optimization.  */
1213  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1214      && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1215    {
1216      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1217      DONE;
1218    }
1219})
1220
1221(define_expand "vsx_store_<mode>"
1222  [(set (match_operand:VSX_M 0 "memory_operand")
1223	(match_operand:VSX_M 1 "vsx_register_operand"))]
1224  "VECTOR_MEM_VSX_P (<MODE>mode)"
1225{
1226  /* Expand to swaps if needed, prior to swap optimization.  */
1227  if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1228      && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1229    {
1230      rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1231      DONE;
1232    }
1233})
1234
1235;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1236;; when you really want their element-reversing behavior.
1237(define_insn "vsx_ld_elemrev_v2di"
1238  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1239        (vec_select:V2DI
1240	  (match_operand:V2DI 1 "memory_operand" "Z")
1241	  (parallel [(const_int 1) (const_int 0)])))]
1242  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1243  "lxvd2x %x0,%y1"
1244  [(set_attr "type" "vecload")])
1245
1246(define_insn "vsx_ld_elemrev_v1ti"
1247  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1248        (vec_select:V1TI
1249	  (match_operand:V1TI 1 "memory_operand" "Z")
1250	  (parallel [(const_int 0)])))]
1251  "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1252{
1253   return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1254}
1255  [(set_attr "type" "vecload")])
1256
1257(define_insn "vsx_ld_elemrev_v2df"
1258  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1259        (vec_select:V2DF
1260	  (match_operand:V2DF 1 "memory_operand" "Z")
1261	  (parallel [(const_int 1) (const_int 0)])))]
1262  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1263  "lxvd2x %x0,%y1"
1264  [(set_attr "type" "vecload")])
1265
1266(define_insn "vsx_ld_elemrev_v4si"
1267  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1268        (vec_select:V4SI
1269	  (match_operand:V4SI 1 "memory_operand" "Z")
1270	  (parallel [(const_int 3) (const_int 2)
1271	             (const_int 1) (const_int 0)])))]
1272  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1273  "lxvw4x %x0,%y1"
1274  [(set_attr "type" "vecload")])
1275
1276(define_insn "vsx_ld_elemrev_v4sf"
1277  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1278        (vec_select:V4SF
1279	  (match_operand:V4SF 1 "memory_operand" "Z")
1280	  (parallel [(const_int 3) (const_int 2)
1281	             (const_int 1) (const_int 0)])))]
1282  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1283  "lxvw4x %x0,%y1"
1284  [(set_attr "type" "vecload")])
1285
1286(define_expand "vsx_ld_elemrev_v8hi"
1287  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1288        (vec_select:V8HI
1289	  (match_operand:V8HI 1 "memory_operand" "Z")
1290	  (parallel [(const_int 7) (const_int 6)
1291	             (const_int 5) (const_int 4)
1292		     (const_int 3) (const_int 2)
1293	             (const_int 1) (const_int 0)])))]
1294  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1295{
1296  if (!TARGET_P9_VECTOR)
1297    {
1298      rtx tmp = gen_reg_rtx (V4SImode);
1299      rtx subreg, subreg2, perm[16], pcv;
1300      /* 2 is leftmost element in register */
1301      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1302      int i;
1303
1304      subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1305      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1306      subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1307
1308      for (i = 0; i < 16; ++i)
1309      	perm[i] = GEN_INT (reorder[i]);
1310
1311      pcv = force_reg (V16QImode,
1312                       gen_rtx_CONST_VECTOR (V16QImode,
1313                                             gen_rtvec_v (16, perm)));
1314      emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1315                                                subreg2, pcv));
1316      DONE;
1317    }
1318})
1319
1320(define_insn "*vsx_ld_elemrev_v8hi_internal"
1321  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1322        (vec_select:V8HI
1323          (match_operand:V8HI 1 "memory_operand" "Z")
1324          (parallel [(const_int 7) (const_int 6)
1325                     (const_int 5) (const_int 4)
1326                     (const_int 3) (const_int 2)
1327                     (const_int 1) (const_int 0)])))]
1328  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1329  "lxvh8x %x0,%y1"
1330  [(set_attr "type" "vecload")])
1331
1332(define_expand "vsx_ld_elemrev_v16qi"
1333  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1334        (vec_select:V16QI
1335          (match_operand:V16QI 1 "memory_operand" "Z")
1336          (parallel [(const_int 15) (const_int 14)
1337                     (const_int 13) (const_int 12)
1338                     (const_int 11) (const_int 10)
1339                     (const_int  9) (const_int  8)
1340                     (const_int  7) (const_int  6)
1341                     (const_int  5) (const_int  4)
1342                     (const_int  3) (const_int  2)
1343                     (const_int  1) (const_int  0)])))]
1344  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1345{
1346  if (!TARGET_P9_VECTOR)
1347    {
1348      rtx tmp = gen_reg_rtx (V4SImode);
1349      rtx subreg, subreg2, perm[16], pcv;
1350      /* 3 is leftmost element in register */
1351      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1352      int i;
1353
1354      subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1355      emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1356      subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1357
1358      for (i = 0; i < 16; ++i)
1359        perm[i] = GEN_INT (reorder[i]);
1360
1361      pcv = force_reg (V16QImode,
1362                       gen_rtx_CONST_VECTOR (V16QImode,
1363                                             gen_rtvec_v (16, perm)));
1364      emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1365                                                 subreg2, pcv));
1366      DONE;
1367    }
1368})
1369
1370(define_insn "vsx_ld_elemrev_v16qi_internal"
1371  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1372        (vec_select:V16QI
1373          (match_operand:V16QI 1 "memory_operand" "Z")
1374          (parallel [(const_int 15) (const_int 14)
1375                     (const_int 13) (const_int 12)
1376                     (const_int 11) (const_int 10)
1377                     (const_int  9) (const_int  8)
1378                     (const_int  7) (const_int  6)
1379                     (const_int  5) (const_int  4)
1380                     (const_int  3) (const_int  2)
1381                     (const_int  1) (const_int  0)])))]
1382  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1383  "lxvb16x %x0,%y1"
1384  [(set_attr "type" "vecload")])
1385
1386(define_insn "vsx_st_elemrev_v1ti"
1387  [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1388        (vec_select:V1TI
1389          (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1390          (parallel [(const_int 0)])))
1391   (clobber (match_dup 1))]
1392  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1393{
1394  return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1395}
1396  [(set_attr "type" "vecstore")])
1397
1398(define_insn "vsx_st_elemrev_v2df"
1399  [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1400        (vec_select:V2DF
1401          (match_operand:V2DF 1 "vsx_register_operand" "wa")
1402          (parallel [(const_int 1) (const_int 0)])))]
1403  "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1404  "stxvd2x %x1,%y0"
1405  [(set_attr "type" "vecstore")])
1406
1407(define_insn "vsx_st_elemrev_v2di"
1408  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1409        (vec_select:V2DI
1410          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1411          (parallel [(const_int 1) (const_int 0)])))]
1412  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1413  "stxvd2x %x1,%y0"
1414  [(set_attr "type" "vecstore")])
1415
1416(define_insn "vsx_st_elemrev_v4sf"
1417  [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1418        (vec_select:V4SF
1419          (match_operand:V4SF 1 "vsx_register_operand" "wa")
1420          (parallel [(const_int 3) (const_int 2)
1421                     (const_int 1) (const_int 0)])))]
1422  "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1423  "stxvw4x %x1,%y0"
1424  [(set_attr "type" "vecstore")])
1425
1426(define_insn "vsx_st_elemrev_v4si"
1427  [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1428        (vec_select:V4SI
1429	  (match_operand:V4SI 1 "vsx_register_operand" "wa")
1430	  (parallel [(const_int 3) (const_int 2)
1431	             (const_int 1) (const_int 0)])))]
1432  "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1433  "stxvw4x %x1,%y0"
1434  [(set_attr "type" "vecstore")])
1435
1436(define_expand "vsx_st_elemrev_v8hi"
1437  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1438        (vec_select:V8HI
1439          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1440          (parallel [(const_int 7) (const_int 6)
1441                     (const_int 5) (const_int 4)
1442                     (const_int 3) (const_int 2)
1443                     (const_int 1) (const_int 0)])))]
1444  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1445{
1446  if (!TARGET_P9_VECTOR)
1447    {
1448      rtx mem_subreg, subreg, perm[16], pcv;
1449      rtx tmp = gen_reg_rtx (V8HImode);
1450      /* 2 is leftmost element in register */
1451      unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1452      int i;
1453
1454      for (i = 0; i < 16; ++i)
1455      	perm[i] = GEN_INT (reorder[i]);
1456
1457      pcv = force_reg (V16QImode,
1458                       gen_rtx_CONST_VECTOR (V16QImode,
1459                                             gen_rtvec_v (16, perm)));
1460      emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1461                                                operands[1], pcv));
1462      subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1463      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1464      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1465      DONE;
1466    }
1467})
1468
1469(define_insn "*vsx_st_elemrev_v2di_internal"
1470  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1471        (vec_select:V2DI
1472          (match_operand:V2DI 1 "vsx_register_operand" "wa")
1473          (parallel [(const_int 1) (const_int 0)])))]
1474  "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1475  "stxvd2x %x1,%y0"
1476  [(set_attr "type" "vecstore")])
1477
1478(define_insn "*vsx_st_elemrev_v8hi_internal"
1479  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1480        (vec_select:V8HI
1481          (match_operand:V8HI 1 "vsx_register_operand" "wa")
1482          (parallel [(const_int 7) (const_int 6)
1483                     (const_int 5) (const_int 4)
1484                     (const_int 3) (const_int 2)
1485                     (const_int 1) (const_int 0)])))]
1486  "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1487  "stxvh8x %x1,%y0"
1488  [(set_attr "type" "vecstore")])
1489
1490(define_expand "vsx_st_elemrev_v16qi"
1491  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1492        (vec_select:V16QI
1493          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1494          (parallel [(const_int 15) (const_int 14)
1495                     (const_int 13) (const_int 12)
1496                     (const_int 11) (const_int 10)
1497                     (const_int  9) (const_int  8)
1498                     (const_int  7) (const_int  6)
1499                     (const_int  5) (const_int  4)
1500                     (const_int  3) (const_int  2)
1501                     (const_int  1) (const_int  0)])))]
1502  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1503{
1504  if (!TARGET_P9_VECTOR)
1505    {
1506      rtx mem_subreg, subreg, perm[16], pcv;
1507      rtx tmp = gen_reg_rtx (V16QImode);
1508      /* 3 is leftmost element in register */
1509      unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1510      int i;
1511
1512      for (i = 0; i < 16; ++i)
1513      	perm[i] = GEN_INT (reorder[i]);
1514
1515      pcv = force_reg (V16QImode,
1516                       gen_rtx_CONST_VECTOR (V16QImode,
1517                                             gen_rtvec_v (16, perm)));
1518      emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1519                                                 operands[1], pcv));
1520      subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1521      mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1522      emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1523      DONE;
1524    }
1525})
1526
1527(define_insn "*vsx_st_elemrev_v16qi_internal"
1528  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1529        (vec_select:V16QI
1530          (match_operand:V16QI 1 "vsx_register_operand" "wa")
1531          (parallel [(const_int 15) (const_int 14)
1532                     (const_int 13) (const_int 12)
1533                     (const_int 11) (const_int 10)
1534                     (const_int  9) (const_int  8)
1535                     (const_int  7) (const_int  6)
1536                     (const_int  5) (const_int  4)
1537                     (const_int  3) (const_int  2)
1538                     (const_int  1) (const_int  0)])))]
1539  "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1540  "stxvb16x %x1,%y0"
1541  [(set_attr "type" "vecstore")])
1542
1543
1544;; VSX vector floating point arithmetic instructions.  The VSX scalar
1545;; instructions are now combined with the insn for the traditional floating
1546;; point unit.
1547(define_insn "*vsx_add<mode>3"
1548  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1549        (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1550		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1551  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1552  "xvadd<sd>p %x0,%x1,%x2"
1553  [(set_attr "type" "<VStype_simple>")])
1554
1555(define_insn "*vsx_sub<mode>3"
1556  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1557        (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1558		     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1559  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1560  "xvsub<sd>p %x0,%x1,%x2"
1561  [(set_attr "type" "<VStype_simple>")])
1562
1563(define_insn "*vsx_mul<mode>3"
1564  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1565        (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1566		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1567  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1568  "xvmul<sd>p %x0,%x1,%x2"
1569  [(set_attr "type" "<VStype_simple>")])
1570
1571; Emulate vector with scalar for vec_mul in V2DImode
1572(define_insn_and_split "vsx_mul_v2di"
1573  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1574        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1575                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1576                     UNSPEC_VSX_MULSD))]
1577  "VECTOR_MEM_VSX_P (V2DImode)"
1578  "#"
1579  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1580  [(const_int 0)]
1581{
1582  rtx op0 = operands[0];
1583  rtx op1 = operands[1];
1584  rtx op2 = operands[2];
1585  rtx op3 = gen_reg_rtx (DImode);
1586  rtx op4 = gen_reg_rtx (DImode);
1587  rtx op5 = gen_reg_rtx (DImode);
1588  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1589  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1590  if (TARGET_POWERPC64)
1591    emit_insn (gen_muldi3 (op5, op3, op4));
1592  else
1593    {
1594      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1595      emit_move_insn (op5, ret);
1596    }
1597  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1598  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1599  if (TARGET_POWERPC64)
1600    emit_insn (gen_muldi3 (op3, op3, op4));
1601  else
1602    {
1603      rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1604      emit_move_insn (op3, ret);
1605    }
1606  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1607  DONE;
1608}
1609  [(set_attr "type" "mul")])
1610
1611(define_insn "*vsx_div<mode>3"
1612  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1613        (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1614		   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1615  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1616  "xvdiv<sd>p %x0,%x1,%x2"
1617  [(set_attr "type" "<VStype_div>")])
1618
1619; Emulate vector with scalar for vec_div in V2DImode
1620(define_insn_and_split "vsx_div_v2di"
1621  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1622        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1623                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1624                     UNSPEC_VSX_DIVSD))]
1625  "VECTOR_MEM_VSX_P (V2DImode)"
1626  "#"
1627  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1628  [(const_int 0)]
1629{
1630  rtx op0 = operands[0];
1631  rtx op1 = operands[1];
1632  rtx op2 = operands[2];
1633  rtx op3 = gen_reg_rtx (DImode);
1634  rtx op4 = gen_reg_rtx (DImode);
1635  rtx op5 = gen_reg_rtx (DImode);
1636  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1637  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1638  if (TARGET_POWERPC64)
1639    emit_insn (gen_divdi3 (op5, op3, op4));
1640  else
1641    {
1642      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1643      rtx target = emit_library_call_value (libfunc,
1644					    op5, LCT_NORMAL, DImode,
1645					    op3, DImode,
1646					    op4, DImode);
1647      emit_move_insn (op5, target);
1648    }
1649  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1650  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1651  if (TARGET_POWERPC64)
1652    emit_insn (gen_divdi3 (op3, op3, op4));
1653  else
1654    {
1655      rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1656      rtx target = emit_library_call_value (libfunc,
1657					    op3, LCT_NORMAL, DImode,
1658					    op3, DImode,
1659					    op4, DImode);
1660      emit_move_insn (op3, target);
1661    }
1662  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1663  DONE;
1664}
1665  [(set_attr "type" "div")])
1666
1667(define_insn_and_split "vsx_udiv_v2di"
1668  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1669        (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1670                      (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1671                     UNSPEC_VSX_DIVUD))]
1672  "VECTOR_MEM_VSX_P (V2DImode)"
1673  "#"
1674  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1675  [(const_int 0)]
1676{
1677  rtx op0 = operands[0];
1678  rtx op1 = operands[1];
1679  rtx op2 = operands[2];
1680  rtx op3 = gen_reg_rtx (DImode);
1681  rtx op4 = gen_reg_rtx (DImode);
1682  rtx op5 = gen_reg_rtx (DImode);
1683  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1684  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1685  if (TARGET_POWERPC64)
1686    emit_insn (gen_udivdi3 (op5, op3, op4));
1687  else
1688    {
1689      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1690      rtx target = emit_library_call_value (libfunc,
1691					    op5, LCT_NORMAL, DImode,
1692					    op3, DImode,
1693					    op4, DImode);
1694      emit_move_insn (op5, target);
1695    }
1696  emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1697  emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1698  if (TARGET_POWERPC64)
1699    emit_insn (gen_udivdi3 (op3, op3, op4));
1700  else
1701    {
1702      rtx libfunc = optab_libfunc (udiv_optab, DImode);
1703      rtx target = emit_library_call_value (libfunc,
1704					    op3, LCT_NORMAL, DImode,
1705					    op3, DImode,
1706					    op4, DImode);
1707      emit_move_insn (op3, target);
1708    }
1709  emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1710  DONE;
1711}
1712  [(set_attr "type" "div")])
1713
1714;; *tdiv* instruction returning the FG flag
1715(define_expand "vsx_tdiv<mode>3_fg"
1716  [(set (match_dup 3)
1717	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1718		      (match_operand:VSX_B 2 "vsx_register_operand")]
1719		     UNSPEC_VSX_TDIV))
1720   (set (match_operand:SI 0 "gpc_reg_operand")
1721	(gt:SI (match_dup 3)
1722	       (const_int 0)))]
1723  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1724{
1725  operands[3] = gen_reg_rtx (CCFPmode);
1726})
1727
1728;; *tdiv* instruction returning the FE flag
1729(define_expand "vsx_tdiv<mode>3_fe"
1730  [(set (match_dup 3)
1731	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1732		      (match_operand:VSX_B 2 "vsx_register_operand")]
1733		     UNSPEC_VSX_TDIV))
1734   (set (match_operand:SI 0 "gpc_reg_operand")
1735	(eq:SI (match_dup 3)
1736	       (const_int 0)))]
1737  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1738{
1739  operands[3] = gen_reg_rtx (CCFPmode);
1740})
1741
1742(define_insn "*vsx_tdiv<mode>3_internal"
1743  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1744	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1745		      (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1746		   UNSPEC_VSX_TDIV))]
1747  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1748  "x<VSv>tdiv<sd>p %0,%x1,%x2"
1749  [(set_attr "type" "<VStype_simple>")])
1750
1751(define_insn "vsx_fre<mode>2"
1752  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1753	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1754		      UNSPEC_FRES))]
1755  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1756  "xvre<sd>p %x0,%x1"
1757  [(set_attr "type" "<VStype_simple>")])
1758
1759(define_insn "*vsx_neg<mode>2"
1760  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1761        (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1762  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1763  "xvneg<sd>p %x0,%x1"
1764  [(set_attr "type" "<VStype_simple>")])
1765
1766(define_insn "*vsx_abs<mode>2"
1767  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1768        (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1769  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1770  "xvabs<sd>p %x0,%x1"
1771  [(set_attr "type" "<VStype_simple>")])
1772
1773(define_insn "vsx_nabs<mode>2"
1774  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1775        (neg:VSX_F
1776	 (abs:VSX_F
1777	  (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1778  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1779  "xvnabs<sd>p %x0,%x1"
1780  [(set_attr "type" "<VStype_simple>")])
1781
1782(define_insn "vsx_smax<mode>3"
1783  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1784        (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1785		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1786  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1787  "xvmax<sd>p %x0,%x1,%x2"
1788  [(set_attr "type" "<VStype_simple>")])
1789
1790(define_insn "*vsx_smin<mode>3"
1791  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1792        (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1793		    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1794  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1795  "xvmin<sd>p %x0,%x1,%x2"
1796  [(set_attr "type" "<VStype_simple>")])
1797
1798(define_insn "*vsx_sqrt<mode>2"
1799  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1800        (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1801  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1802  "xvsqrt<sd>p %x0,%x1"
1803  [(set_attr "type" "<sd>sqrt")])
1804
1805(define_insn "*vsx_rsqrte<mode>2"
1806  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1807	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1808		      UNSPEC_RSQRT))]
1809  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1810  "xvrsqrte<sd>p %x0,%x1"
1811  [(set_attr "type" "<VStype_simple>")])
1812
1813;; *tsqrt* returning the fg flag
1814(define_expand "vsx_tsqrt<mode>2_fg"
1815  [(set (match_dup 2)
1816	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1817		     UNSPEC_VSX_TSQRT))
1818   (set (match_operand:SI 0 "gpc_reg_operand")
1819	(gt:SI (match_dup 2)
1820	       (const_int 0)))]
1821  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1822{
1823  operands[2] = gen_reg_rtx (CCFPmode);
1824})
1825
1826;; *tsqrt* returning the fe flag
1827(define_expand "vsx_tsqrt<mode>2_fe"
1828  [(set (match_dup 2)
1829	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1830		     UNSPEC_VSX_TSQRT))
1831   (set (match_operand:SI 0 "gpc_reg_operand")
1832	(eq:SI (match_dup 2)
1833	       (const_int 0)))]
1834  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1835{
1836  operands[2] = gen_reg_rtx (CCFPmode);
1837})
1838
1839(define_insn "*vsx_tsqrt<mode>2_internal"
1840  [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1841	(unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
1842		     UNSPEC_VSX_TSQRT))]
1843  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1844  "x<VSv>tsqrt<sd>p %0,%x1"
1845  [(set_attr "type" "<VStype_simple>")])
1846
1847;; Fused vector multiply/add instructions. Support the classical Altivec
1848;; versions of fma, which allows the target to be a separate register from the
1849;; 3 inputs.  Under VSX, the target must be either the addend or the first
1850;; multiply.
1851
1852(define_insn "*vsx_fmav4sf4"
1853  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1854	(fma:V4SF
1855	  (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1856	  (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1857	  (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))]
1858  "VECTOR_UNIT_VSX_P (V4SFmode)"
1859  "@
1860   xvmaddasp %x0,%x1,%x2
1861   xvmaddmsp %x0,%x1,%x3
1862   vmaddfp %0,%1,%2,%3"
1863  [(set_attr "type" "vecfloat")])
1864
1865(define_insn "*vsx_fmav2df4"
1866  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1867	(fma:V2DF
1868	  (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1869	  (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1870	  (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
1871  "VECTOR_UNIT_VSX_P (V2DFmode)"
1872  "@
1873   xvmaddadp %x0,%x1,%x2
1874   xvmaddmdp %x0,%x1,%x3"
1875  [(set_attr "type" "vecdouble")])
1876
1877(define_insn "*vsx_fms<mode>4"
1878  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1879	(fma:VSX_F
1880	  (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
1881	  (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1882	  (neg:VSX_F
1883	    (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1884  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1885  "@
1886   xvmsuba<sd>p %x0,%x1,%x2
1887   xvmsubm<sd>p %x0,%x1,%x3"
1888  [(set_attr "type" "<VStype_mul>")])
1889
1890(define_insn "*vsx_nfma<mode>4"
1891  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
1892	(neg:VSX_F
1893	 (fma:VSX_F
1894	  (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
1895	  (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
1896	  (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
1897  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1898  "@
1899   xvnmadda<sd>p %x0,%x1,%x2
1900   xvnmaddm<sd>p %x0,%x1,%x3"
1901  [(set_attr "type" "<VStype_mul>")])
1902
1903(define_insn "*vsx_nfmsv4sf4"
1904  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v")
1905	(neg:V4SF
1906	 (fma:V4SF
1907	   (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v")
1908	   (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v")
1909	   (neg:V4SF
1910	     (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))]
1911  "VECTOR_UNIT_VSX_P (V4SFmode)"
1912  "@
1913   xvnmsubasp %x0,%x1,%x2
1914   xvnmsubmsp %x0,%x1,%x3
1915   vnmsubfp %0,%1,%2,%3"
1916  [(set_attr "type" "vecfloat")])
1917
1918(define_insn "*vsx_nfmsv2df4"
1919  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
1920	(neg:V2DF
1921	 (fma:V2DF
1922	   (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
1923	   (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
1924	   (neg:V2DF
1925	     (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
1926  "VECTOR_UNIT_VSX_P (V2DFmode)"
1927  "@
1928   xvnmsubadp %x0,%x1,%x2
1929   xvnmsubmdp %x0,%x1,%x3"
1930  [(set_attr "type" "vecdouble")])
1931
1932;; Vector conditional expressions (no scalar version for these instructions)
1933(define_insn "vsx_eq<mode>"
1934  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1935	(eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1936		  (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1937  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1938  "xvcmpeq<sd>p %x0,%x1,%x2"
1939  [(set_attr "type" "<VStype_simple>")])
1940
1941(define_insn "vsx_gt<mode>"
1942  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1943	(gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1944		  (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1945  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1946  "xvcmpgt<sd>p %x0,%x1,%x2"
1947  [(set_attr "type" "<VStype_simple>")])
1948
1949(define_insn "*vsx_ge<mode>"
1950  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1951	(ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1952		  (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1953  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1954  "xvcmpge<sd>p %x0,%x1,%x2"
1955  [(set_attr "type" "<VStype_simple>")])
1956
1957;; Compare vectors producing a vector result and a predicate, setting CR6 to
1958;; indicate a combined status
1959(define_insn "*vsx_eq_<mode>_p"
1960  [(set (reg:CC CR6_REGNO)
1961	(unspec:CC
1962	 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1963		 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1964	 UNSPEC_PREDICATE))
1965   (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1966	(eq:VSX_F (match_dup 1)
1967		  (match_dup 2)))]
1968  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1969  "xvcmpeq<sd>p. %x0,%x1,%x2"
1970  [(set_attr "type" "<VStype_simple>")])
1971
1972(define_insn "*vsx_gt_<mode>_p"
1973  [(set (reg:CC CR6_REGNO)
1974	(unspec:CC
1975	 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1976		 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1977	 UNSPEC_PREDICATE))
1978   (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1979	(gt:VSX_F (match_dup 1)
1980		  (match_dup 2)))]
1981  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1982  "xvcmpgt<sd>p. %x0,%x1,%x2"
1983  [(set_attr "type" "<VStype_simple>")])
1984
1985(define_insn "*vsx_ge_<mode>_p"
1986  [(set (reg:CC CR6_REGNO)
1987	(unspec:CC
1988	 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1989		 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
1990	 UNSPEC_PREDICATE))
1991   (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1992	(ge:VSX_F (match_dup 1)
1993		  (match_dup 2)))]
1994  "VECTOR_UNIT_VSX_P (<MODE>mode)"
1995  "xvcmpge<sd>p. %x0,%x1,%x2"
1996  [(set_attr "type" "<VStype_simple>")])
1997
1998;; Vector select
1999(define_insn "*vsx_xxsel<mode>"
2000  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2001	(if_then_else:VSX_L
2002	 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2003		(match_operand:VSX_L 4 "zero_constant" ""))
2004	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2005	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2006  "VECTOR_MEM_VSX_P (<MODE>mode)"
2007  "xxsel %x0,%x3,%x2,%x1"
2008  [(set_attr "type" "vecmove")
2009   (set_attr "isa" "<VSisa>")])
2010
2011(define_insn "*vsx_xxsel<mode>_uns"
2012  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
2013	(if_then_else:VSX_L
2014	 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
2015		   (match_operand:VSX_L 4 "zero_constant" ""))
2016	 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
2017	 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
2018  "VECTOR_MEM_VSX_P (<MODE>mode)"
2019  "xxsel %x0,%x3,%x2,%x1"
2020  [(set_attr "type" "vecmove")
2021   (set_attr "isa" "<VSisa>")])
2022
2023;; Copy sign
2024(define_insn "vsx_copysign<mode>3"
2025  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2026	(unspec:VSX_F
2027	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2028	  (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2029	 UNSPEC_COPYSIGN))]
2030  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2031  "xvcpsgn<sd>p %x0,%x2,%x1"
2032  [(set_attr "type" "<VStype_simple>")])
2033
2034;; For the conversions, limit the register class for the integer value to be
2035;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2036;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2037;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2038;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2039;; in allowing virtual registers.
2040(define_insn "vsx_float<VSi><mode>2"
2041  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2042	(float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2043  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2044  "xvcvsx<VSc><sd>p %x0,%x1"
2045  [(set_attr "type" "<VStype_simple>")])
2046
2047(define_insn "vsx_floatuns<VSi><mode>2"
2048  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2049	(unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2050  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2051  "xvcvux<VSc><sd>p %x0,%x1"
2052  [(set_attr "type" "<VStype_simple>")])
2053
2054(define_insn "vsx_fix_trunc<mode><VSi>2"
2055  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2056	(fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2057  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2058  "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2059  [(set_attr "type" "<VStype_simple>")])
2060
2061(define_insn "vsx_fixuns_trunc<mode><VSi>2"
2062  [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2063	(unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2064  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2065  "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2066  [(set_attr "type" "<VStype_simple>")])
2067
2068;; Math rounding functions
2069(define_insn "vsx_x<VSv>r<sd>pi"
2070  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2071	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2072		      UNSPEC_VSX_ROUND_I))]
2073  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074  "x<VSv>r<sd>pi %x0,%x1"
2075  [(set_attr "type" "<VStype_simple>")])
2076
2077(define_insn "vsx_x<VSv>r<sd>pic"
2078  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2079	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2080		      UNSPEC_VSX_ROUND_IC))]
2081  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2082  "x<VSv>r<sd>pic %x0,%x1"
2083  [(set_attr "type" "<VStype_simple>")])
2084
2085(define_insn "vsx_btrunc<mode>2"
2086  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2087	(fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2088  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2089  "xvr<sd>piz %x0,%x1"
2090  [(set_attr "type" "<VStype_simple>")])
2091
2092(define_insn "*vsx_b2trunc<mode>2"
2093  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2094	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2095		      UNSPEC_FRIZ))]
2096  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2097  "x<VSv>r<sd>piz %x0,%x1"
2098  [(set_attr "type" "<VStype_simple>")])
2099
2100(define_insn "vsx_floor<mode>2"
2101  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2102	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2103		      UNSPEC_FRIM))]
2104  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2105  "xvr<sd>pim %x0,%x1"
2106  [(set_attr "type" "<VStype_simple>")])
2107
2108(define_insn "vsx_ceil<mode>2"
2109  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2110	(unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2111		      UNSPEC_FRIP))]
2112  "VECTOR_UNIT_VSX_P (<MODE>mode)"
2113  "xvr<sd>pip %x0,%x1"
2114  [(set_attr "type" "<VStype_simple>")])
2115
2116
2117;; VSX convert to/from double vector
2118
2119;; Convert between single and double precision
2120;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2121;; scalar single precision instructions internally use the double format.
2122;; Prefer the altivec registers, since we likely will need to do a vperm
2123(define_insn "vsx_xscvdpsp"
2124  [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2125	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2126			      UNSPEC_VSX_CVSPDP))]
2127  "VECTOR_UNIT_VSX_P (DFmode)"
2128  "xscvdpsp %x0,%x1"
2129  [(set_attr "type" "fp")])
2130
2131(define_insn "vsx_xvcvspdp_be"
2132  [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2133     (float_extend:V2DF
2134       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2135	 (parallel [(const_int 0) (const_int 2)]))))]
2136  "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2137  "xvcvspdp %x0,%x1"
2138  [(set_attr "type" "vecdouble")])
2139
2140(define_insn "vsx_xvcvspdp_le"
2141  [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2142     (float_extend:V2DF
2143       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2144	 (parallel [(const_int 1) (const_int 3)]))))]
2145  "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2146  "xvcvspdp %x0,%x1"
2147  [(set_attr "type" "vecdouble")])
2148
2149(define_expand "vsx_xvcvspdp"
2150  [(match_operand:V2DF 0 "vsx_register_operand")
2151   (match_operand:V4SF 1 "vsx_register_operand")]
2152  "VECTOR_UNIT_VSX_P (V4SFmode)"
2153{
2154  if (BYTES_BIG_ENDIAN)
2155    emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2156  else
2157    emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2158  DONE;
2159})
2160
2161(define_insn "vsx_xvcvdpsp"
2162  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2163	(unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2164			      UNSPEC_VSX_CVSPDP))]
2165  "VECTOR_UNIT_VSX_P (V2DFmode)"
2166  "xvcvdpsp %x0,%x1"
2167  [(set_attr "type" "vecdouble")])
2168
2169;; xscvspdp, represent the scalar SF type as V4SF
2170(define_insn "vsx_xscvspdp"
2171  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2172	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2173		   UNSPEC_VSX_CVSPDP))]
2174  "VECTOR_UNIT_VSX_P (V4SFmode)"
2175  "xscvspdp %x0,%x1"
2176  [(set_attr "type" "fp")])
2177
2178;; Same as vsx_xscvspdp, but use SF as the type
2179(define_insn "vsx_xscvspdp_scalar2"
2180  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2181	(unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2182		   UNSPEC_VSX_CVSPDP))]
2183  "VECTOR_UNIT_VSX_P (V4SFmode)"
2184  "xscvspdp %x0,%x1"
2185  [(set_attr "type" "fp")])
2186
2187;; Generate xvcvhpsp instruction
2188(define_insn "vsx_xvcvhpsp"
2189  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2190	(unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2191		     UNSPEC_VSX_CVHPSP))]
2192  "TARGET_P9_VECTOR"
2193  "xvcvhpsp %x0,%x1"
2194  [(set_attr "type" "vecfloat")])
2195
2196;; Generate xvcvsphp
2197(define_insn "vsx_xvcvsphp"
2198  [(set (match_operand:V4SI 0 "register_operand" "=wa")
2199	(unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2200		     UNSPEC_VSX_XVCVSPHP))]
2201  "TARGET_P9_VECTOR"
2202  "xvcvsphp %x0,%x1"
2203[(set_attr "type" "vecfloat")])
2204
2205;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2206;; format of scalars is actually DF.
2207(define_insn "vsx_xscvdpsp_scalar"
2208  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2209	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2210		     UNSPEC_VSX_CVSPDP))]
2211  "VECTOR_UNIT_VSX_P (V4SFmode)"
2212  "xscvdpsp %x0,%x1"
2213  [(set_attr "type" "fp")])
2214
2215;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2216(define_insn "vsx_xscvdpspn"
2217  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2218	(unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2219		     UNSPEC_VSX_CVDPSPN))]
2220  "TARGET_XSCVDPSPN"
2221  "xscvdpspn %x0,%x1"
2222  [(set_attr "type" "fp")])
2223
2224(define_insn "vsx_xscvspdpn"
2225  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2226	(unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2227		   UNSPEC_VSX_CVSPDPN))]
2228  "TARGET_XSCVSPDPN"
2229  "xscvspdpn %x0,%x1"
2230  [(set_attr "type" "fp")])
2231
2232(define_insn "vsx_xscvdpspn_scalar"
2233  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2234	(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2235		     UNSPEC_VSX_CVDPSPN))]
2236  "TARGET_XSCVDPSPN"
2237  "xscvdpspn %x0,%x1"
2238  [(set_attr "type" "fp")])
2239
2240;; Used by direct move to move a SFmode value from GPR to VSX register
2241(define_insn "vsx_xscvspdpn_directmove"
2242  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2243	(unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2244		   UNSPEC_VSX_CVSPDPN))]
2245  "TARGET_XSCVSPDPN"
2246  "xscvspdpn %x0,%x1"
2247  [(set_attr "type" "fp")])
2248
2249;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2250
2251(define_insn "vsx_xvcv<su>xwsp"
2252  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2253     (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2254  "VECTOR_UNIT_VSX_P (V4SFmode)"
2255  "xvcv<su>xwsp %x0,%x1"
2256  [(set_attr "type" "vecfloat")])
2257
2258(define_insn "vsx_xvcv<su>xddp"
2259  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2260        (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2261  "VECTOR_UNIT_VSX_P (V2DFmode)"
2262  "xvcv<su>xddp %x0,%x1"
2263  [(set_attr "type" "vecdouble")])
2264
2265(define_insn "vsx_xvcvsp<su>xws"
2266  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2267        (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2268  "VECTOR_UNIT_VSX_P (V4SFmode)"
2269  "xvcvsp<su>xws %x0,%x1"
2270  [(set_attr "type" "vecfloat")])
2271
2272(define_insn "vsx_xvcvdp<su>xds"
2273  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2274        (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2275  "VECTOR_UNIT_VSX_P (V2DFmode)"
2276  "xvcvdp<su>xds %x0,%x1"
2277  [(set_attr "type" "vecdouble")])
2278
2279(define_expand "vsx_xvcvsxddp_scale"
2280  [(match_operand:V2DF 0 "vsx_register_operand")
2281   (match_operand:V2DI 1 "vsx_register_operand")
2282   (match_operand:QI 2 "immediate_operand")]
2283  "VECTOR_UNIT_VSX_P (V2DFmode)"
2284{
2285  rtx op0 = operands[0];
2286  rtx op1 = operands[1];
2287  int scale = INTVAL(operands[2]);
2288  emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2289  if (scale != 0)
2290    rs6000_scale_v2df (op0, op0, -scale);
2291  DONE;
2292})
2293
2294(define_expand "vsx_xvcvuxddp_scale"
2295  [(match_operand:V2DF 0 "vsx_register_operand")
2296   (match_operand:V2DI 1 "vsx_register_operand")
2297   (match_operand:QI 2 "immediate_operand")]
2298  "VECTOR_UNIT_VSX_P (V2DFmode)"
2299{
2300  rtx op0 = operands[0];
2301  rtx op1 = operands[1];
2302  int scale = INTVAL(operands[2]);
2303  emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2304  if (scale != 0)
2305    rs6000_scale_v2df (op0, op0, -scale);
2306  DONE;
2307})
2308
2309(define_expand "vsx_xvcvdpsxds_scale"
2310  [(match_operand:V2DI 0 "vsx_register_operand")
2311   (match_operand:V2DF 1 "vsx_register_operand")
2312   (match_operand:QI 2 "immediate_operand")]
2313  "VECTOR_UNIT_VSX_P (V2DFmode)"
2314{
2315  rtx op0 = operands[0];
2316  rtx op1 = operands[1];
2317  rtx tmp;
2318  int scale = INTVAL (operands[2]);
2319  if (scale == 0)
2320    tmp = op1;
2321  else
2322    {
2323      tmp  = gen_reg_rtx (V2DFmode);
2324      rs6000_scale_v2df (tmp, op1, scale);
2325    }
2326  emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2327  DONE;
2328})
2329
2330;; convert vector of 64-bit floating point numbers to vector of
2331;; 64-bit unsigned integer
2332(define_expand "vsx_xvcvdpuxds_scale"
2333  [(match_operand:V2DI 0 "vsx_register_operand")
2334   (match_operand:V2DF 1 "vsx_register_operand")
2335   (match_operand:QI 2 "immediate_operand")]
2336  "VECTOR_UNIT_VSX_P (V2DFmode)"
2337{
2338  rtx op0 = operands[0];
2339  rtx op1 = operands[1];
2340  rtx tmp;
2341  int scale = INTVAL (operands[2]);
2342  if (scale == 0)
2343    tmp = op1;
2344  else
2345    {
2346      tmp = gen_reg_rtx (V2DFmode);
2347      rs6000_scale_v2df (tmp, op1, scale);
2348    }
2349  emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2350  DONE;
2351})
2352
2353;; Convert from 64-bit to 32-bit types
2354;; Note, favor the Altivec registers since the usual use of these instructions
2355;; is in vector converts and we need to use the Altivec vperm instruction.
2356
2357(define_insn "vsx_xvcvdpsxws"
2358  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2359	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2360		     UNSPEC_VSX_CVDPSXWS))]
2361  "VECTOR_UNIT_VSX_P (V2DFmode)"
2362  "xvcvdpsxws %x0,%x1"
2363  [(set_attr "type" "vecdouble")])
2364
2365(define_insn "vsx_xvcvdpuxws"
2366  [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2367	(unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2368		     UNSPEC_VSX_CVDPUXWS))]
2369  "VECTOR_UNIT_VSX_P (V2DFmode)"
2370  "xvcvdpuxws %x0,%x1"
2371  [(set_attr "type" "vecdouble")])
2372
2373(define_insn "vsx_xvcvsxdsp"
2374  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2375	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2376		     UNSPEC_VSX_CVSXDSP))]
2377  "VECTOR_UNIT_VSX_P (V2DFmode)"
2378  "xvcvsxdsp %x0,%x1"
2379  [(set_attr "type" "vecfloat")])
2380
2381(define_insn "vsx_xvcvuxdsp"
2382  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2383	(unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2384		     UNSPEC_VSX_CVUXDSP))]
2385  "VECTOR_UNIT_VSX_P (V2DFmode)"
2386  "xvcvuxdsp %x0,%x1"
2387  [(set_attr "type" "vecdouble")])
2388
2389;; Convert vector of 32-bit signed/unsigned integers to vector of
2390;; 64-bit floating point numbers.
2391(define_insn "vsx_xvcv<su>xwdp_be"
2392  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2393     (any_float:V2DF
2394       (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2395	 (parallel [(const_int 0) (const_int 2)]))))]
2396  "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2397  "xvcv<su>xwdp %x0,%x1"
2398  [(set_attr "type" "vecdouble")])
2399
2400(define_insn "vsx_xvcv<su>xwdp_le"
2401  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2402     (any_float:V2DF
2403       (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2404	 (parallel [(const_int 1) (const_int 3)]))))]
2405  "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2406  "xvcv<su>xwdp %x0,%x1"
2407  [(set_attr "type" "vecdouble")])
2408
2409(define_expand "vsx_xvcv<su>xwdp"
2410  [(match_operand:V2DF 0 "vsx_register_operand")
2411   (match_operand:V4SI 1 "vsx_register_operand")
2412   (any_float (pc))]
2413  "VECTOR_UNIT_VSX_P (V2DFmode)"
2414{
2415  if (BYTES_BIG_ENDIAN)
2416    emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2417  else
2418    emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2419  DONE;
2420})
2421
2422(define_insn "vsx_xvcvsxwdp_df"
2423  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2424	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2425		   UNSPEC_VSX_CVSXWDP))]
2426  "TARGET_VSX"
2427  "xvcvsxwdp %x0,%x1"
2428  [(set_attr "type" "vecdouble")])
2429
2430(define_insn "vsx_xvcvuxwdp_df"
2431  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2432	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2433		   UNSPEC_VSX_CVUXWDP))]
2434  "TARGET_VSX"
2435  "xvcvuxwdp %x0,%x1"
2436  [(set_attr "type" "vecdouble")])
2437
2438;; Convert vector of 32-bit floating point numbers to vector of
2439;; 64-bit signed/unsigned integers.
2440(define_insn "vsx_xvcvsp<su>xds_be"
2441  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2442     (any_fix:V2DI
2443       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2444	 (parallel [(const_int 0) (const_int 2)]))))]
2445  "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2446  "xvcvsp<su>xds %x0,%x1"
2447  [(set_attr "type" "vecdouble")])
2448
2449(define_insn "vsx_xvcvsp<su>xds_le"
2450  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2451     (any_fix:V2DI
2452       (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2453	 (parallel [(const_int 1) (const_int 3)]))))]
2454  "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2455  "xvcvsp<su>xds %x0,%x1"
2456  [(set_attr "type" "vecdouble")])
2457
2458(define_expand "vsx_xvcvsp<su>xds"
2459  [(match_operand:V2DI 0 "vsx_register_operand")
2460   (match_operand:V4SF 1 "vsx_register_operand")
2461   (any_fix (pc))]
2462  "VECTOR_UNIT_VSX_P (V2DFmode)"
2463{
2464  if (BYTES_BIG_ENDIAN)
2465    emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2466  else
2467    emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2468  DONE;
2469})
2470
2471;; Generate float2 double
2472;; convert two double to float
2473(define_expand "float2_v2df"
2474  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2475   (use (match_operand:V2DF 1 "register_operand" "wa"))
2476   (use (match_operand:V2DF 2 "register_operand" "wa"))]
2477 "VECTOR_UNIT_VSX_P (V4SFmode)"
2478{
2479  rtx rtx_src1, rtx_src2, rtx_dst;
2480
2481  rtx_dst = operands[0];
2482  rtx_src1 = operands[1];
2483  rtx_src2 = operands[2];
2484
2485  rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2486  DONE;
2487})
2488
2489;; Generate float2
2490;; convert two long long signed ints to float
2491(define_expand "float2_v2di"
2492  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2493   (use (match_operand:V2DI 1 "register_operand" "wa"))
2494   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2495 "VECTOR_UNIT_VSX_P (V4SFmode)"
2496{
2497  rtx rtx_src1, rtx_src2, rtx_dst;
2498
2499  rtx_dst = operands[0];
2500  rtx_src1 = operands[1];
2501  rtx_src2 = operands[2];
2502
2503  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2504  DONE;
2505})
2506
2507;; Generate uns_float2
2508;; convert two long long unsigned ints to float
2509(define_expand "uns_float2_v2di"
2510  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2511   (use (match_operand:V2DI 1 "register_operand" "wa"))
2512   (use (match_operand:V2DI 2 "register_operand" "wa"))]
2513 "VECTOR_UNIT_VSX_P (V4SFmode)"
2514{
2515  rtx rtx_src1, rtx_src2, rtx_dst;
2516
2517  rtx_dst = operands[0];
2518  rtx_src1 = operands[1];
2519  rtx_src2 = operands[2];
2520
2521  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2522  DONE;
2523})
2524
2525;; Generate floate
2526;; convert  double or long long signed to float
2527;; (Only even words are valid, BE numbering)
2528(define_expand "floate<mode>"
2529  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2530   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2531  "VECTOR_UNIT_VSX_P (V4SFmode)"
2532{
2533  if (BYTES_BIG_ENDIAN)
2534    {
2535      /* Shift left one word to put even word correct location */
2536      rtx rtx_tmp;
2537      rtx rtx_val = GEN_INT (4);
2538
2539      rtx_tmp = gen_reg_rtx (V4SFmode);
2540      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2541      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2542		 rtx_tmp, rtx_tmp, rtx_val));
2543    }
2544  else
2545    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2546
2547  DONE;
2548})
2549
2550;; Generate uns_floate
2551;; convert long long unsigned to float
2552;; (Only even words are valid, BE numbering)
2553(define_expand "unsfloatev2di"
2554  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2555   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2556  "VECTOR_UNIT_VSX_P (V4SFmode)"
2557{
2558  if (BYTES_BIG_ENDIAN)
2559    {
2560      /* Shift left one word to put even word correct location */
2561      rtx rtx_tmp;
2562      rtx rtx_val = GEN_INT (4);
2563
2564      rtx_tmp = gen_reg_rtx (V4SFmode);
2565      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2566      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2567		 rtx_tmp, rtx_tmp, rtx_val));
2568    }
2569  else
2570    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2571
2572  DONE;
2573})
2574
2575;; Generate floato
2576;; convert double or long long signed to float
2577;; Only odd words are valid, BE numbering)
2578(define_expand "floato<mode>"
2579  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2580   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2581  "VECTOR_UNIT_VSX_P (V4SFmode)"
2582{
2583  if (BYTES_BIG_ENDIAN)
2584    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2585  else
2586    {
2587      /* Shift left one word to put odd word correct location */
2588      rtx rtx_tmp;
2589      rtx rtx_val = GEN_INT (4);
2590
2591      rtx_tmp = gen_reg_rtx (V4SFmode);
2592      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2593      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2594		 rtx_tmp, rtx_tmp, rtx_val));
2595    }
2596  DONE;
2597})
2598
2599;; Generate uns_floato
2600;; convert long long unsigned to float
2601;; (Only odd words are valid, BE numbering)
2602(define_expand "unsfloatov2di"
2603 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2604  (use (match_operand:V2DI 1 "register_operand" "wa"))]
2605 "VECTOR_UNIT_VSX_P (V4SFmode)"
2606{
2607  if (BYTES_BIG_ENDIAN)
2608    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2609  else
2610    {
2611      /* Shift left one word to put odd word correct location */
2612      rtx rtx_tmp;
2613      rtx rtx_val = GEN_INT (4);
2614
2615      rtx_tmp = gen_reg_rtx (V4SFmode);
2616      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2617      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2618		 rtx_tmp, rtx_tmp, rtx_val));
2619    }
2620  DONE;
2621})
2622
2623;; Generate vsigned2
2624;; convert two double float vectors to a vector of single precision ints
2625(define_expand "vsigned2_v2df"
2626  [(match_operand:V4SI 0 "register_operand" "=wa")
2627   (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2628		 (match_operand:V2DF 2 "register_operand" "wa")]
2629  UNSPEC_VSX_VSIGNED2)]
2630  "TARGET_VSX"
2631{
2632  rtx rtx_src1, rtx_src2, rtx_dst;
2633  bool signed_convert=true;
2634
2635  rtx_dst = operands[0];
2636  rtx_src1 = operands[1];
2637  rtx_src2 = operands[2];
2638
2639  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2640  DONE;
2641})
2642
2643;; Generate vsignedo_v2df
2644;; signed double float to int convert odd word
2645(define_expand "vsignedo_v2df"
2646  [(set (match_operand:V4SI 0 "register_operand" "=wa")
2647	(match_operand:V2DF 1 "register_operand" "wa"))]
2648  "TARGET_VSX"
2649{
2650  if (BYTES_BIG_ENDIAN)
2651    {
2652      rtx rtx_tmp;
2653      rtx rtx_val = GEN_INT (12);
2654      rtx_tmp = gen_reg_rtx (V4SImode);
2655
2656      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2657
2658      /* Big endian word numbering for words in operand is 0 1 2 3.
2659	 take (operand[1] operand[1]) and shift left one word
2660	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2661	 Words 1 and 3 are now are now where they need to be for result.  */
2662
2663      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2664		 rtx_tmp, rtx_val));
2665    }
2666  else
2667    /* Little endian word numbering for operand is 3 2 1 0.
2668       Result words 3 and 1 are where they need to be.  */
2669    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2670
2671  DONE;
2672}
2673  [(set_attr "type" "veccomplex")])
2674
2675;; Generate vsignede_v2df
2676;; signed double float to int even word
2677(define_expand "vsignede_v2df"
2678  [(set (match_operand:V4SI 0 "register_operand" "=v")
2679	(match_operand:V2DF 1 "register_operand" "v"))]
2680  "TARGET_VSX"
2681{
2682  if (BYTES_BIG_ENDIAN)
2683    /* Big endian word numbering for words in operand is 0 1
2684       Result words 0 is where they need to be.  */
2685    emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2686
2687  else
2688    {
2689      rtx rtx_tmp;
2690      rtx rtx_val = GEN_INT (12);
2691      rtx_tmp = gen_reg_rtx (V4SImode);
2692
2693      emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2694
2695      /* Little endian word numbering for operand is 3 2 1 0.
2696	 take (operand[1] operand[1]) and shift left three words
2697	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2698	 Words 0 and 2 are now where they need to be for the result.  */
2699      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2700		 rtx_tmp, rtx_val));
2701    }
2702  DONE;
2703}
2704  [(set_attr "type" "veccomplex")])
2705
2706;; Generate unsigned2
2707;; convert two double float vectors to a vector of single precision
2708;; unsigned ints
2709(define_expand "vunsigned2_v2df"
2710[(match_operand:V4SI 0 "register_operand" "=v")
2711 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2712	       (match_operand:V2DF 2 "register_operand" "v")]
2713	      UNSPEC_VSX_VSIGNED2)]
2714 "TARGET_VSX"
2715{
2716  rtx rtx_src1, rtx_src2, rtx_dst;
2717  bool signed_convert=false;
2718
2719  rtx_dst = operands[0];
2720  rtx_src1 = operands[1];
2721  rtx_src2 = operands[2];
2722
2723  rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2724  DONE;
2725})
2726
2727;; Generate vunsignedo_v2df
2728;; unsigned double float to int convert odd word
2729(define_expand "vunsignedo_v2df"
2730  [(set (match_operand:V4SI 0 "register_operand" "=v")
2731	(match_operand:V2DF 1 "register_operand" "v"))]
2732  "TARGET_VSX"
2733{
2734  if (BYTES_BIG_ENDIAN)
2735    {
2736      rtx rtx_tmp;
2737      rtx rtx_val = GEN_INT (12);
2738      rtx_tmp = gen_reg_rtx (V4SImode);
2739
2740      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2741
2742      /* Big endian word numbering for words in operand is 0 1 2 3.
2743	 take (operand[1] operand[1]) and shift left one word
2744	 0 1 2 3    0 1 2 3  =>  1 2 3 0
2745	 Words 1 and 3 are now are now where they need to be for result.  */
2746
2747      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2748		 rtx_tmp, rtx_val));
2749    }
2750  else
2751    /* Little endian word numbering for operand is 3 2 1 0.
2752       Result words 3 and 1 are where they need to be.  */
2753    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2754
2755  DONE;
2756}
2757  [(set_attr "type" "veccomplex")])
2758
2759;; Generate vunsignede_v2df
2760;; unsigned double float to int even word
2761(define_expand "vunsignede_v2df"
2762  [(set (match_operand:V4SI 0 "register_operand" "=v")
2763	(match_operand:V2DF 1 "register_operand" "v"))]
2764  "TARGET_VSX"
2765{
2766  if (BYTES_BIG_ENDIAN)
2767    /* Big endian word numbering for words in operand is 0 1
2768       Result words 0 is where they need to be.  */
2769    emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2770
2771  else
2772    {
2773      rtx rtx_tmp;
2774      rtx rtx_val = GEN_INT (12);
2775      rtx_tmp = gen_reg_rtx (V4SImode);
2776
2777      emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2778
2779      /* Little endian word numbering for operand is 3 2 1 0.
2780	 take (operand[1] operand[1]) and shift left three words
2781	 0 1 2 3   0 1 2 3  =>  3 0 1 2
2782	 Words 0 and 2 are now where they need to be for the result.  */
2783      emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2784		 rtx_tmp, rtx_val));
2785    }
2786  DONE;
2787}
2788  [(set_attr "type" "veccomplex")])
2789
2790;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2791;; since the xvrdpiz instruction does not truncate the value if the floating
2792;; point value is < LONG_MIN or > LONG_MAX.
2793(define_insn "*vsx_float_fix_v2df2"
2794  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2795	(float:V2DF
2796	 (fix:V2DI
2797	  (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2798  "TARGET_HARD_FLOAT
2799   && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2800   && !flag_trapping_math && TARGET_FRIZ"
2801  "xvrdpiz %x0,%x1"
2802  [(set_attr "type" "vecdouble")])
2803
2804
2805;; Permute operations
2806
2807;; Build a V2DF/V2DI vector from two scalars
2808(define_insn "vsx_concat_<mode>"
2809  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2810	(vec_concat:VSX_D
2811	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2812	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
2813  "VECTOR_MEM_VSX_P (<MODE>mode)"
2814{
2815  if (which_alternative == 0)
2816    return (BYTES_BIG_ENDIAN
2817	    ? "xxpermdi %x0,%x1,%x2,0"
2818	    : "xxpermdi %x0,%x2,%x1,0");
2819
2820  else if (which_alternative == 1)
2821    return (BYTES_BIG_ENDIAN
2822	    ? "mtvsrdd %x0,%1,%2"
2823	    : "mtvsrdd %x0,%2,%1");
2824
2825  else
2826    gcc_unreachable ();
2827}
2828  [(set_attr "type" "vecperm")])
2829
2830;; Combiner patterns to allow creating XXPERMDI's to access either double
2831;; word element in a vector register.
2832(define_insn "*vsx_concat_<mode>_1"
2833  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2834	(vec_concat:VSX_D
2835	 (vec_select:<VS_scalar>
2836	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2837	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2838	 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2839  "VECTOR_MEM_VSX_P (<MODE>mode)"
2840{
2841  HOST_WIDE_INT dword = INTVAL (operands[2]);
2842  if (BYTES_BIG_ENDIAN)
2843    {
2844      operands[4] = GEN_INT (2*dword);
2845      return "xxpermdi %x0,%x1,%x3,%4";
2846    }
2847  else
2848    {
2849      operands[4] = GEN_INT (!dword);
2850      return "xxpermdi %x0,%x3,%x1,%4";
2851    }
2852}
2853  [(set_attr "type" "vecperm")])
2854
2855(define_insn "*vsx_concat_<mode>_2"
2856  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2857	(vec_concat:VSX_D
2858	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2859	 (vec_select:<VS_scalar>
2860	  (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2861	  (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2862  "VECTOR_MEM_VSX_P (<MODE>mode)"
2863{
2864  HOST_WIDE_INT dword = INTVAL (operands[3]);
2865  if (BYTES_BIG_ENDIAN)
2866    {
2867      operands[4] = GEN_INT (dword);
2868      return "xxpermdi %x0,%x1,%x2,%4";
2869    }
2870  else
2871    {
2872      operands[4] = GEN_INT (2 * !dword);
2873      return "xxpermdi %x0,%x2,%x1,%4";
2874    }
2875}
2876  [(set_attr "type" "vecperm")])
2877
2878(define_insn "*vsx_concat_<mode>_3"
2879  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2880	(vec_concat:VSX_D
2881	 (vec_select:<VS_scalar>
2882	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2883	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2884	 (vec_select:<VS_scalar>
2885	  (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2886	  (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2887  "VECTOR_MEM_VSX_P (<MODE>mode)"
2888{
2889  HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2890  HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2891  if (BYTES_BIG_ENDIAN)
2892    {
2893      operands[5] = GEN_INT ((2 * dword1) + dword2);
2894      return "xxpermdi %x0,%x1,%x3,%5";
2895    }
2896  else
2897    {
2898      operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2899      return "xxpermdi %x0,%x3,%x1,%5";
2900    }
2901}
2902  [(set_attr "type" "vecperm")])
2903
2904;; Special purpose concat using xxpermdi to glue two single precision values
2905;; together, relying on the fact that internally scalar floats are represented
2906;; as doubles.  This is used to initialize a V4SF vector with 4 floats
2907(define_insn "vsx_concat_v2sf"
2908  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2909	(unspec:V2DF
2910	 [(match_operand:SF 1 "vsx_register_operand" "wa")
2911	  (match_operand:SF 2 "vsx_register_operand" "wa")]
2912	 UNSPEC_VSX_CONCAT))]
2913  "VECTOR_MEM_VSX_P (V2DFmode)"
2914{
2915  if (BYTES_BIG_ENDIAN)
2916    return "xxpermdi %x0,%x1,%x2,0";
2917  else
2918    return "xxpermdi %x0,%x2,%x1,0";
2919}
2920  [(set_attr "type" "vecperm")])
2921
2922;; Concatenate 4 SImode elements into a V4SImode reg.
2923(define_expand "vsx_init_v4si"
2924  [(use (match_operand:V4SI 0 "gpc_reg_operand"))
2925   (use (match_operand:SI 1 "gpc_reg_operand"))
2926   (use (match_operand:SI 2 "gpc_reg_operand"))
2927   (use (match_operand:SI 3 "gpc_reg_operand"))
2928   (use (match_operand:SI 4 "gpc_reg_operand"))]
2929   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
2930{
2931  rtx a = gen_reg_rtx (DImode);
2932  rtx b = gen_reg_rtx (DImode);
2933  rtx c = gen_reg_rtx (DImode);
2934  rtx d = gen_reg_rtx (DImode);
2935  emit_insn (gen_zero_extendsidi2 (a, operands[1]));
2936  emit_insn (gen_zero_extendsidi2 (b, operands[2]));
2937  emit_insn (gen_zero_extendsidi2 (c, operands[3]));
2938  emit_insn (gen_zero_extendsidi2 (d, operands[4]));
2939  if (!BYTES_BIG_ENDIAN)
2940    {
2941      std::swap (a, b);
2942      std::swap (c, d);
2943    }
2944
2945  rtx aa = gen_reg_rtx (DImode);
2946  rtx ab = gen_reg_rtx (DImode);
2947  rtx cc = gen_reg_rtx (DImode);
2948  rtx cd = gen_reg_rtx (DImode);
2949  emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
2950  emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
2951  emit_insn (gen_iordi3 (ab, aa, b));
2952  emit_insn (gen_iordi3 (cd, cc, d));
2953
2954  rtx abcd = gen_reg_rtx (V2DImode);
2955  emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
2956  emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
2957  DONE;
2958})
2959
2960;; xxpermdi for little endian loads and stores.  We need several of
2961;; these since the form of the PARALLEL differs by mode.
2962(define_insn "*vsx_xxpermdi2_le_<mode>"
2963  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2964        (vec_select:VSX_D
2965          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
2966          (parallel [(const_int 1) (const_int 0)])))]
2967  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
2968  "xxpermdi %x0,%x1,%x1,2"
2969  [(set_attr "type" "vecperm")])
2970
2971(define_insn "xxswapd_v16qi"
2972  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2973	(vec_select:V16QI
2974	  (match_operand:V16QI 1 "vsx_register_operand" "wa")
2975	  (parallel [(const_int 8) (const_int 9)
2976		     (const_int 10) (const_int 11)
2977		     (const_int 12) (const_int 13)
2978		     (const_int 14) (const_int 15)
2979		     (const_int 0) (const_int 1)
2980		     (const_int 2) (const_int 3)
2981		     (const_int 4) (const_int 5)
2982		     (const_int 6) (const_int 7)])))]
2983  "TARGET_VSX"
2984;; AIX does not support the extended mnemonic xxswapd.  Use the basic
2985;; mnemonic xxpermdi instead.
2986  "xxpermdi %x0,%x1,%x1,2"
2987  [(set_attr "type" "vecperm")])
2988
2989(define_insn "xxswapd_v8hi"
2990  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2991	(vec_select:V8HI
2992	  (match_operand:V8HI 1 "vsx_register_operand" "wa")
2993	  (parallel [(const_int 4) (const_int 5)
2994		     (const_int 6) (const_int 7)
2995		     (const_int 0) (const_int 1)
2996		     (const_int 2) (const_int 3)])))]
2997  "TARGET_VSX"
2998;; AIX does not support the extended mnemonic xxswapd.  Use the basic
2999;; mnemonic xxpermdi instead.
3000  "xxpermdi %x0,%x1,%x1,2"
3001  [(set_attr "type" "vecperm")])
3002
3003(define_insn "xxswapd_<mode>"
3004  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3005	(vec_select:VSX_W
3006	  (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3007	  (parallel [(const_int 2) (const_int 3)
3008		     (const_int 0) (const_int 1)])))]
3009  "TARGET_VSX"
3010;; AIX does not support extended mnemonic xxswapd.  Use the basic
3011;; mnemonic xxpermdi instead.
3012  "xxpermdi %x0,%x1,%x1,2"
3013  [(set_attr "type" "vecperm")])
3014
3015(define_insn "xxswapd_<mode>"
3016  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3017	(vec_select:VSX_D
3018	  (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3019	  (parallel [(const_int 1) (const_int 0)])))]
3020  "TARGET_VSX"
3021;; AIX does not support extended mnemonic xxswapd.  Use the basic
3022;; mnemonic xxpermdi instead.
3023  "xxpermdi %x0,%x1,%x1,2"
3024  [(set_attr "type" "vecperm")])
3025
3026;; lxvd2x for little endian loads.  We need several of
3027;; these since the form of the PARALLEL differs by mode.
3028(define_insn "*vsx_lxvd2x2_le_<mode>"
3029  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3030        (vec_select:VSX_D
3031          (match_operand:VSX_D 1 "memory_operand" "Z")
3032          (parallel [(const_int 1) (const_int 0)])))]
3033  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3034  "lxvd2x %x0,%y1"
3035  [(set_attr "type" "vecload")])
3036
3037(define_insn "*vsx_lxvd2x4_le_<mode>"
3038  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3039        (vec_select:VSX_W
3040          (match_operand:VSX_W 1 "memory_operand" "Z")
3041          (parallel [(const_int 2) (const_int 3)
3042                     (const_int 0) (const_int 1)])))]
3043  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3044  "lxvd2x %x0,%y1"
3045  [(set_attr "type" "vecload")])
3046
3047(define_insn "*vsx_lxvd2x8_le_V8HI"
3048  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3049        (vec_select:V8HI
3050          (match_operand:V8HI 1 "memory_operand" "Z")
3051          (parallel [(const_int 4) (const_int 5)
3052                     (const_int 6) (const_int 7)
3053                     (const_int 0) (const_int 1)
3054                     (const_int 2) (const_int 3)])))]
3055  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3056  "lxvd2x %x0,%y1"
3057  [(set_attr "type" "vecload")])
3058
3059(define_insn "*vsx_lxvd2x16_le_V16QI"
3060  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3061        (vec_select:V16QI
3062          (match_operand:V16QI 1 "memory_operand" "Z")
3063          (parallel [(const_int 8) (const_int 9)
3064                     (const_int 10) (const_int 11)
3065                     (const_int 12) (const_int 13)
3066                     (const_int 14) (const_int 15)
3067                     (const_int 0) (const_int 1)
3068                     (const_int 2) (const_int 3)
3069                     (const_int 4) (const_int 5)
3070                     (const_int 6) (const_int 7)])))]
3071  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3072  "lxvd2x %x0,%y1"
3073  [(set_attr "type" "vecload")])
3074
3075;; stxvd2x for little endian stores.  We need several of
3076;; these since the form of the PARALLEL differs by mode.
3077(define_insn "*vsx_stxvd2x2_le_<mode>"
3078  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3079        (vec_select:VSX_D
3080          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3081          (parallel [(const_int 1) (const_int 0)])))]
3082  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3083  "stxvd2x %x1,%y0"
3084  [(set_attr "type" "vecstore")])
3085
3086(define_insn "*vsx_stxvd2x4_le_<mode>"
3087  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3088        (vec_select:VSX_W
3089          (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3090          (parallel [(const_int 2) (const_int 3)
3091                     (const_int 0) (const_int 1)])))]
3092  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3093  "stxvd2x %x1,%y0"
3094  [(set_attr "type" "vecstore")])
3095
3096(define_insn "*vsx_stxvd2x8_le_V8HI"
3097  [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3098        (vec_select:V8HI
3099          (match_operand:V8HI 1 "vsx_register_operand" "wa")
3100          (parallel [(const_int 4) (const_int 5)
3101                     (const_int 6) (const_int 7)
3102                     (const_int 0) (const_int 1)
3103                     (const_int 2) (const_int 3)])))]
3104  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3105  "stxvd2x %x1,%y0"
3106  [(set_attr "type" "vecstore")])
3107
3108(define_insn "*vsx_stxvd2x16_le_V16QI"
3109  [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3110        (vec_select:V16QI
3111          (match_operand:V16QI 1 "vsx_register_operand" "wa")
3112          (parallel [(const_int 8) (const_int 9)
3113                     (const_int 10) (const_int 11)
3114                     (const_int 12) (const_int 13)
3115                     (const_int 14) (const_int 15)
3116                     (const_int 0) (const_int 1)
3117                     (const_int 2) (const_int 3)
3118                     (const_int 4) (const_int 5)
3119                     (const_int 6) (const_int 7)])))]
3120  "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3121  "stxvd2x %x1,%y0"
3122  [(set_attr "type" "vecstore")])
3123
3124;; Convert a TImode value into V1TImode
3125(define_expand "vsx_set_v1ti"
3126  [(match_operand:V1TI 0 "nonimmediate_operand")
3127   (match_operand:V1TI 1 "nonimmediate_operand")
3128   (match_operand:TI 2 "input_operand")
3129   (match_operand:QI 3 "u5bit_cint_operand")]
3130  "VECTOR_MEM_VSX_P (V1TImode)"
3131{
3132  if (operands[3] != const0_rtx)
3133    gcc_unreachable ();
3134
3135  emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3136  DONE;
3137})
3138
3139;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3140(define_expand "vsx_set_<mode>"
3141  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3142   (use (match_operand:VSX_D 1 "vsx_register_operand"))
3143   (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3144   (use (match_operand:QI 3 "const_0_to_1_operand"))]
3145  "VECTOR_MEM_VSX_P (<MODE>mode)"
3146{
3147  rtx dest = operands[0];
3148  rtx vec_reg = operands[1];
3149  rtx value = operands[2];
3150  rtx ele = operands[3];
3151  rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3152
3153  if (ele == const0_rtx)
3154    {
3155      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3156      emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3157      DONE;
3158    }
3159  else if (ele == const1_rtx)
3160    {
3161      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3162      emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3163      DONE;
3164    }
3165  else
3166    gcc_unreachable ();
3167})
3168
3169;; Extract a DF/DI element from V2DF/V2DI
3170;; Optimize cases were we can do a simple or direct move.
3171;; Or see if we can avoid doing the move at all
3172
3173;; There are some unresolved problems with reload that show up if an Altivec
3174;; register was picked.  Limit the scalar value to FPRs for now.
3175
3176(define_insn "vsx_extract_<mode>"
3177  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d,  wr, wr")
3178	(vec_select:<VS_scalar>
3179	 (match_operand:VSX_D 1 "gpc_reg_operand"      "wa, wa, wa, wa")
3180	 (parallel
3181	  [(match_operand:QI 2 "const_0_to_1_operand"  "wD, n,  wD, n")])))]
3182  "VECTOR_MEM_VSX_P (<MODE>mode)"
3183{
3184  int element = INTVAL (operands[2]);
3185  int op0_regno = REGNO (operands[0]);
3186  int op1_regno = REGNO (operands[1]);
3187  int fldDM;
3188
3189  gcc_assert (IN_RANGE (element, 0, 1));
3190  gcc_assert (VSX_REGNO_P (op1_regno));
3191
3192  if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3193    {
3194      if (op0_regno == op1_regno)
3195	return ASM_COMMENT_START " vec_extract to same register";
3196
3197      else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3198	       && TARGET_POWERPC64)
3199	return "mfvsrd %0,%x1";
3200
3201      else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3202	return "fmr %0,%1";
3203
3204      else if (VSX_REGNO_P (op0_regno))
3205	return "xxlor %x0,%x1,%x1";
3206
3207      else
3208	gcc_unreachable ();
3209    }
3210
3211  else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3212	   && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
3213    return "mfvsrld %0,%x1";
3214
3215  else if (VSX_REGNO_P (op0_regno))
3216    {
3217      fldDM = element << 1;
3218      if (!BYTES_BIG_ENDIAN)
3219	fldDM = 3 - fldDM;
3220      operands[3] = GEN_INT (fldDM);
3221      return "xxpermdi %x0,%x1,%x1,%3";
3222    }
3223
3224  else
3225    gcc_unreachable ();
3226}
3227  [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")
3228   (set_attr "isa" "*,*,p8v,p9v")])
3229
3230;; Optimize extracting a single scalar element from memory.
3231(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3232  [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr")
3233	(vec_select:<VSX_D:VS_scalar>
3234	 (match_operand:VSX_D 1 "memory_operand" "m,m")
3235	 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3236   (clobber (match_scratch:P 3 "=&b,&b"))]
3237  "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3238  "#"
3239  "&& reload_completed"
3240  [(set (match_dup 0) (match_dup 4))]
3241{
3242  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3243					   operands[3], <VSX_D:VS_scalar>mode);
3244}
3245  [(set_attr "type" "fpload,load")
3246   (set_attr "length" "8")])
3247
3248;; Optimize storing a single scalar element that is the right location to
3249;; memory
3250(define_insn "*vsx_extract_<mode>_store"
3251  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
3252	(vec_select:<VS_scalar>
3253	 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3254	 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3255  "VECTOR_MEM_VSX_P (<MODE>mode)"
3256  "@
3257   stfd%U0%X0 %1,%0
3258   stxsdx %x1,%y0
3259   stxsd %1,%0"
3260  [(set_attr "type" "fpstore")
3261   (set_attr "isa" "*,p7v,p9v")])
3262
3263;; Variable V2DI/V2DF extract shift
3264(define_insn "vsx_vslo_<mode>"
3265  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3266	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3267			     (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3268			    UNSPEC_VSX_VSLO))]
3269  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3270  "vslo %0,%1,%2"
3271  [(set_attr "type" "vecperm")])
3272
3273;; Variable V2DI/V2DF extract from a register
3274(define_insn_and_split "vsx_extract_<mode>_var"
3275  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3276	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3277			     (match_operand:DI 2 "gpc_reg_operand" "r")]
3278			    UNSPEC_VSX_EXTRACT))
3279   (clobber (match_scratch:DI 3 "=r"))
3280   (clobber (match_scratch:V2DI 4 "=&v"))]
3281  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3282  "#"
3283  "&& reload_completed"
3284  [(const_int 0)]
3285{
3286  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3287				operands[3], operands[4]);
3288  DONE;
3289})
3290
3291;; Variable V2DI/V2DF extract from memory
3292(define_insn_and_split "*vsx_extract_<mode>_var_load"
3293  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r")
3294	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3295			     (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3296			    UNSPEC_VSX_EXTRACT))
3297   (clobber (match_scratch:DI 3 "=&b,&b"))]
3298  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3299  "#"
3300  "&& reload_completed"
3301  [(set (match_dup 0) (match_dup 4))]
3302{
3303  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3304					   operands[3], <VS_scalar>mode);
3305}
3306  [(set_attr "type" "fpload,load")])
3307
3308;; Extract a SF element from V4SF
3309(define_insn_and_split "vsx_extract_v4sf"
3310  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3311	(vec_select:SF
3312	 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3313	 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3314   (clobber (match_scratch:V4SF 3 "=0"))]
3315  "VECTOR_UNIT_VSX_P (V4SFmode)"
3316  "#"
3317  "&& 1"
3318  [(const_int 0)]
3319{
3320  rtx op0 = operands[0];
3321  rtx op1 = operands[1];
3322  rtx op2 = operands[2];
3323  rtx op3 = operands[3];
3324  rtx tmp;
3325  HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3326
3327  if (ele == 0)
3328    tmp = op1;
3329  else
3330    {
3331      if (GET_CODE (op3) == SCRATCH)
3332	op3 = gen_reg_rtx (V4SFmode);
3333      emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3334      tmp = op3;
3335    }
3336  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3337  DONE;
3338}
3339  [(set_attr "length" "8")
3340   (set_attr "type" "fp")])
3341
3342(define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3343  [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3344	(vec_select:SF
3345	 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3346	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3347   (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3348  "VECTOR_MEM_VSX_P (V4SFmode)"
3349  "#"
3350  "&& reload_completed"
3351  [(set (match_dup 0) (match_dup 4))]
3352{
3353  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3354					   operands[3], SFmode);
3355}
3356  [(set_attr "type" "fpload,fpload,fpload,load")
3357   (set_attr "length" "8")
3358   (set_attr "isa" "*,p7v,p9v,*")])
3359
3360;; Variable V4SF extract from a register
3361(define_insn_and_split "vsx_extract_v4sf_var"
3362  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3363	(unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3364		    (match_operand:DI 2 "gpc_reg_operand" "r")]
3365		   UNSPEC_VSX_EXTRACT))
3366   (clobber (match_scratch:DI 3 "=r"))
3367   (clobber (match_scratch:V2DI 4 "=&v"))]
3368  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3369  "#"
3370  "&& reload_completed"
3371  [(const_int 0)]
3372{
3373  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3374				operands[3], operands[4]);
3375  DONE;
3376})
3377
3378;; Variable V4SF extract from memory
3379(define_insn_and_split "*vsx_extract_v4sf_var_load"
3380  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3381	(unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3382		    (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3383		   UNSPEC_VSX_EXTRACT))
3384   (clobber (match_scratch:DI 3 "=&b,&b"))]
3385  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3386  "#"
3387  "&& reload_completed"
3388  [(set (match_dup 0) (match_dup 4))]
3389{
3390  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3391					   operands[3], SFmode);
3392}
3393  [(set_attr "type" "fpload,load")])
3394
3395;; Expand the builtin form of xxpermdi to canonical rtl.
3396(define_expand "vsx_xxpermdi_<mode>"
3397  [(match_operand:VSX_L 0 "vsx_register_operand")
3398   (match_operand:VSX_L 1 "vsx_register_operand")
3399   (match_operand:VSX_L 2 "vsx_register_operand")
3400   (match_operand:QI 3 "u5bit_cint_operand")]
3401  "VECTOR_MEM_VSX_P (<MODE>mode)"
3402{
3403  rtx target = operands[0];
3404  rtx op0 = operands[1];
3405  rtx op1 = operands[2];
3406  int mask = INTVAL (operands[3]);
3407  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3408  rtx perm1 = GEN_INT ((mask & 1) + 2);
3409  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3410
3411  if (<MODE>mode == V2DFmode)
3412    gen = gen_vsx_xxpermdi2_v2df_1;
3413  else
3414    {
3415      gen = gen_vsx_xxpermdi2_v2di_1;
3416      if (<MODE>mode != V2DImode)
3417	{
3418	  target = gen_lowpart (V2DImode, target);
3419	  op0 = gen_lowpart (V2DImode, op0);
3420	  op1 = gen_lowpart (V2DImode, op1);
3421	}
3422    }
3423  emit_insn (gen (target, op0, op1, perm0, perm1));
3424  DONE;
3425})
3426
3427;; Special version of xxpermdi that retains big-endian semantics.
3428(define_expand "vsx_xxpermdi_<mode>_be"
3429  [(match_operand:VSX_L 0 "vsx_register_operand")
3430   (match_operand:VSX_L 1 "vsx_register_operand")
3431   (match_operand:VSX_L 2 "vsx_register_operand")
3432   (match_operand:QI 3 "u5bit_cint_operand")]
3433  "VECTOR_MEM_VSX_P (<MODE>mode)"
3434{
3435  rtx target = operands[0];
3436  rtx op0 = operands[1];
3437  rtx op1 = operands[2];
3438  int mask = INTVAL (operands[3]);
3439  rtx perm0 = GEN_INT ((mask >> 1) & 1);
3440  rtx perm1 = GEN_INT ((mask & 1) + 2);
3441  rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3442
3443  if (<MODE>mode == V2DFmode)
3444    gen = gen_vsx_xxpermdi2_v2df_1;
3445  else
3446    {
3447      gen = gen_vsx_xxpermdi2_v2di_1;
3448      if (<MODE>mode != V2DImode)
3449	{
3450	  target = gen_lowpart (V2DImode, target);
3451	  op0 = gen_lowpart (V2DImode, op0);
3452	  op1 = gen_lowpart (V2DImode, op1);
3453	}
3454    }
3455  /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3456     transformation we don't want; it is necessary for
3457     rs6000_expand_vec_perm_const_1 but not for this use.  So we
3458     prepare for that by reversing the transformation here.  */
3459  if (BYTES_BIG_ENDIAN)
3460    emit_insn (gen (target, op0, op1, perm0, perm1));
3461  else
3462    {
3463      rtx p0 = GEN_INT (3 - INTVAL (perm1));
3464      rtx p1 = GEN_INT (3 - INTVAL (perm0));
3465      emit_insn (gen (target, op1, op0, p0, p1));
3466    }
3467  DONE;
3468})
3469
3470(define_insn "vsx_xxpermdi2_<mode>_1"
3471  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3472	(vec_select:VSX_D
3473	  (vec_concat:<VS_double>
3474	    (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3475	    (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3476	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
3477		     (match_operand 4 "const_2_to_3_operand" "")])))]
3478  "VECTOR_MEM_VSX_P (<MODE>mode)"
3479{
3480  int op3, op4, mask;
3481
3482  /* For little endian, swap operands and invert/swap selectors
3483     to get the correct xxpermdi.  The operand swap sets up the
3484     inputs as a little endian array.  The selectors are swapped
3485     because they are defined to use big endian ordering.  The
3486     selectors are inverted to get the correct doublewords for
3487     little endian ordering.  */
3488  if (BYTES_BIG_ENDIAN)
3489    {
3490      op3 = INTVAL (operands[3]);
3491      op4 = INTVAL (operands[4]);
3492    }
3493  else
3494    {
3495      op3 = 3 - INTVAL (operands[4]);
3496      op4 = 3 - INTVAL (operands[3]);
3497    }
3498
3499  mask = (op3 << 1) | (op4 - 2);
3500  operands[3] = GEN_INT (mask);
3501
3502  if (BYTES_BIG_ENDIAN)
3503    return "xxpermdi %x0,%x1,%x2,%3";
3504  else
3505    return "xxpermdi %x0,%x2,%x1,%3";
3506}
3507  [(set_attr "type" "vecperm")])
3508
3509;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3510;; none of the small types were allowed in a vector register, so we had to
3511;; extract to a DImode and either do a direct move or store.
3512(define_expand  "vsx_extract_<mode>"
3513  [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
3514		   (vec_select:<VS_scalar>
3515		    (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3516		    (parallel [(match_operand:QI 2 "const_int_operand")])))
3517	      (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3518  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3519{
3520  /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3521  if (TARGET_P9_VECTOR)
3522    {
3523      emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3524					    operands[2]));
3525      DONE;
3526    }
3527})
3528
3529(define_insn "vsx_extract_<mode>_p9"
3530  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3531	(vec_select:<VS_scalar>
3532	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3533	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3534   (clobber (match_scratch:SI 3 "=r,X"))]
3535  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3536{
3537  if (which_alternative == 0)
3538    return "#";
3539
3540  else
3541    {
3542      HOST_WIDE_INT elt = INTVAL (operands[2]);
3543      HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3544			       ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3545			       : elt);
3546
3547      HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3548      HOST_WIDE_INT offset = unit_size * elt_adj;
3549
3550      operands[2] = GEN_INT (offset);
3551      if (unit_size == 4)
3552	return "xxextractuw %x0,%x1,%2";
3553      else
3554	return "vextractu<wd> %0,%1,%2";
3555    }
3556}
3557  [(set_attr "type" "vecsimple")
3558   (set_attr "isa" "p9v,*")])
3559
3560(define_split
3561  [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3562	(vec_select:<VS_scalar>
3563	 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3564	 (parallel [(match_operand:QI 2 "const_int_operand")])))
3565   (clobber (match_operand:SI 3 "int_reg_operand"))]
3566  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3567  [(const_int 0)]
3568{
3569  rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3570  rtx op1 = operands[1];
3571  rtx op2 = operands[2];
3572  rtx op3 = operands[3];
3573  HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3574
3575  emit_move_insn (op3, GEN_INT (offset));
3576  if (BYTES_BIG_ENDIAN)
3577    emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3578  else
3579    emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3580  DONE;
3581})
3582
3583;; Optimize zero extracts to eliminate the AND after the extract.
3584(define_insn_and_split "*vsx_extract_<mode>_di_p9"
3585  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3586	(zero_extend:DI
3587	 (vec_select:<VS_scalar>
3588	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3589	  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3590   (clobber (match_scratch:SI 3 "=r,X"))]
3591  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3592  "#"
3593  "&& reload_completed"
3594  [(parallel [(set (match_dup 4)
3595		   (vec_select:<VS_scalar>
3596		    (match_dup 1)
3597		    (parallel [(match_dup 2)])))
3598	      (clobber (match_dup 3))])]
3599{
3600  operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
3601}
3602  [(set_attr "isa" "p9v,*")])
3603
3604;; Optimize stores to use the ISA 3.0 scalar store instructions
3605(define_insn_and_split "*vsx_extract_<mode>_store_p9"
3606  [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
3607	(vec_select:<VS_scalar>
3608	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3609	 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3610   (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3611   (clobber (match_scratch:SI 4 "=X,&r"))]
3612  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3613  "#"
3614  "&& reload_completed"
3615  [(parallel [(set (match_dup 3)
3616		   (vec_select:<VS_scalar>
3617		    (match_dup 1)
3618		    (parallel [(match_dup 2)])))
3619	      (clobber (match_dup 4))])
3620   (set (match_dup 0)
3621	(match_dup 3))])
3622
3623(define_insn_and_split  "*vsx_extract_si"
3624  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3625	(vec_select:SI
3626	 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3627	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3628   (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3629  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3630  "#"
3631  "&& reload_completed"
3632  [(const_int 0)]
3633{
3634  rtx dest = operands[0];
3635  rtx src = operands[1];
3636  rtx element = operands[2];
3637  rtx vec_tmp = operands[3];
3638  int value;
3639
3640  if (!BYTES_BIG_ENDIAN)
3641    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3642
3643  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3644     instruction.  */
3645  value = INTVAL (element);
3646  if (value != 1)
3647    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3648  else
3649    vec_tmp = src;
3650
3651  if (MEM_P (operands[0]))
3652    {
3653      if (can_create_pseudo_p ())
3654	dest = rs6000_force_indexed_or_indirect_mem (dest);
3655
3656      if (TARGET_P8_VECTOR)
3657	emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3658      else
3659	emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3660    }
3661
3662  else if (TARGET_P8_VECTOR)
3663    emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3664  else
3665    emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3666		    gen_rtx_REG (DImode, REGNO (vec_tmp)));
3667
3668  DONE;
3669}
3670  [(set_attr "type" "mftgpr,vecperm,fpstore")
3671   (set_attr "length" "8")
3672   (set_attr "isa" "*,p8v,*")])
3673
3674(define_insn_and_split  "*vsx_extract_<mode>_p8"
3675  [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3676	(vec_select:<VS_scalar>
3677	 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3678	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3679   (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3680  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3681   && !TARGET_P9_VECTOR"
3682  "#"
3683  "&& reload_completed"
3684  [(const_int 0)]
3685{
3686  rtx dest = operands[0];
3687  rtx src = operands[1];
3688  rtx element = operands[2];
3689  rtx vec_tmp = operands[3];
3690  int value;
3691
3692  if (!BYTES_BIG_ENDIAN)
3693    element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3694
3695  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3696     instruction.  */
3697  value = INTVAL (element);
3698  if (<MODE>mode == V16QImode)
3699    {
3700      if (value != 7)
3701	emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3702      else
3703	vec_tmp = src;
3704    }
3705  else if (<MODE>mode == V8HImode)
3706    {
3707      if (value != 3)
3708	emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3709      else
3710	vec_tmp = src;
3711    }
3712  else
3713    gcc_unreachable ();
3714
3715  emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3716		  gen_rtx_REG (DImode, REGNO (vec_tmp)));
3717  DONE;
3718}
3719  [(set_attr "type" "mftgpr")])
3720
3721;; Optimize extracting a single scalar element from memory.
3722(define_insn_and_split "*vsx_extract_<mode>_load"
3723  [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3724	(vec_select:<VS_scalar>
3725	 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3726	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3727   (clobber (match_scratch:DI 3 "=&b"))]
3728  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3729  "#"
3730  "&& reload_completed"
3731  [(set (match_dup 0) (match_dup 4))]
3732{
3733  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3734					   operands[3], <VS_scalar>mode);
3735}
3736  [(set_attr "type" "load")
3737   (set_attr "length" "8")])
3738
3739;; Variable V16QI/V8HI/V4SI extract from a register
3740(define_insn_and_split "vsx_extract_<mode>_var"
3741  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
3742	(unspec:<VS_scalar>
3743	 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3744	  (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3745	 UNSPEC_VSX_EXTRACT))
3746   (clobber (match_scratch:DI 3 "=r,r"))
3747   (clobber (match_scratch:V2DI 4 "=X,&v"))]
3748  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3749  "#"
3750  "&& reload_completed"
3751  [(const_int 0)]
3752{
3753  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3754				operands[3], operands[4]);
3755  DONE;
3756}
3757  [(set_attr "isa" "p9v,*")])
3758
3759;; Variable V16QI/V8HI/V4SI extract from memory
3760(define_insn_and_split "*vsx_extract_<mode>_var_load"
3761  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r")
3762	(unspec:<VS_scalar>
3763	 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3764	  (match_operand:DI 2 "gpc_reg_operand" "r")]
3765	 UNSPEC_VSX_EXTRACT))
3766   (clobber (match_scratch:DI 3 "=&b"))]
3767  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3768  "#"
3769  "&& reload_completed"
3770  [(set (match_dup 0) (match_dup 4))]
3771{
3772  operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3773					   operands[3], <VS_scalar>mode);
3774}
3775  [(set_attr "type" "load")])
3776
3777;; VSX_EXTRACT optimizations
3778;; Optimize double d = (double) vec_extract (vi, <n>)
3779;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3780(define_insn_and_split "*vsx_extract_si_<uns>float_df"
3781  [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
3782	(any_float:DF
3783	 (vec_select:SI
3784	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3785	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3786   (clobber (match_scratch:V4SI 3 "=v"))]
3787  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3788  "#"
3789  "&& 1"
3790  [(const_int 0)]
3791{
3792  rtx dest = operands[0];
3793  rtx src = operands[1];
3794  rtx element = operands[2];
3795  rtx v4si_tmp = operands[3];
3796  int value;
3797
3798  if (!BYTES_BIG_ENDIAN)
3799    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3800
3801  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3802     instruction.  */
3803  value = INTVAL (element);
3804  if (value != 0)
3805    {
3806      if (GET_CODE (v4si_tmp) == SCRATCH)
3807	v4si_tmp = gen_reg_rtx (V4SImode);
3808      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3809    }
3810  else
3811    v4si_tmp = src;
3812
3813  emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3814  DONE;
3815})
3816
3817;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3818;; where <type> is a floating point type that supported by the hardware that is
3819;; not double.  First convert the value to double, and then to the desired
3820;; type.
3821(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3822  [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
3823	(any_float:VSX_EXTRACT_FL
3824	 (vec_select:SI
3825	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
3826	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3827   (clobber (match_scratch:V4SI 3 "=v"))
3828   (clobber (match_scratch:DF 4 "=wa"))]
3829  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3830  "#"
3831  "&& 1"
3832  [(const_int 0)]
3833{
3834  rtx dest = operands[0];
3835  rtx src = operands[1];
3836  rtx element = operands[2];
3837  rtx v4si_tmp = operands[3];
3838  rtx df_tmp = operands[4];
3839  int value;
3840
3841  if (!BYTES_BIG_ENDIAN)
3842    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3843
3844  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3845     instruction.  */
3846  value = INTVAL (element);
3847  if (value != 0)
3848    {
3849      if (GET_CODE (v4si_tmp) == SCRATCH)
3850	v4si_tmp = gen_reg_rtx (V4SImode);
3851      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3852    }
3853  else
3854    v4si_tmp = src;
3855
3856  if (GET_CODE (df_tmp) == SCRATCH)
3857    df_tmp = gen_reg_rtx (DFmode);
3858
3859  emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3860
3861  if (<MODE>mode == SFmode)
3862    emit_insn (gen_truncdfsf2 (dest, df_tmp));
3863  else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3864    emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3865  else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3866	   && TARGET_FLOAT128_HW)
3867    emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3868  else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3869    emit_insn (gen_extenddfif2 (dest, df_tmp));
3870  else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3871    emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3872  else
3873    gcc_unreachable ();
3874
3875  DONE;
3876})
3877
3878;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3879;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3880;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3881;; vector short or vector unsigned short.
3882(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3883  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3884	(float:FL_CONV
3885	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3886	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3887	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3888   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3889  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3890   && TARGET_P9_VECTOR"
3891  "#"
3892  "&& reload_completed"
3893  [(parallel [(set (match_dup 3)
3894		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3895		    (match_dup 1)
3896		    (parallel [(match_dup 2)])))
3897	      (clobber (scratch:SI))])
3898   (set (match_dup 4)
3899	(sign_extend:DI (match_dup 3)))
3900   (set (match_dup 0)
3901	(float:<FL_CONV:MODE> (match_dup 4)))]
3902{
3903  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3904}
3905  [(set_attr "isa" "<FL_CONV:VSisa>")])
3906
3907(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3908  [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
3909	(unsigned_float:FL_CONV
3910	 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3911	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3912	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3913   (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3914  "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3915   && TARGET_P9_VECTOR"
3916  "#"
3917  "&& reload_completed"
3918  [(parallel [(set (match_dup 3)
3919		   (vec_select:<VSX_EXTRACT_I:VS_scalar>
3920		    (match_dup 1)
3921		    (parallel [(match_dup 2)])))
3922	      (clobber (scratch:SI))])
3923   (set (match_dup 0)
3924	(float:<FL_CONV:MODE> (match_dup 4)))]
3925{
3926  operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3927}
3928  [(set_attr "isa" "<FL_CONV:VSisa>")])
3929
3930;; V4SI/V8HI/V16QI set operation on ISA 3.0
3931(define_insn "vsx_set_<mode>_p9"
3932  [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3933	(unspec:VSX_EXTRACT_I
3934	 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3935	  (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3936	  (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3937	 UNSPEC_VSX_SET))]
3938  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3939{
3940  int ele = INTVAL (operands[3]);
3941  int nunits = GET_MODE_NUNITS (<MODE>mode);
3942
3943  if (!BYTES_BIG_ENDIAN)
3944    ele = nunits - 1 - ele;
3945
3946  operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
3947  if (<MODE>mode == V4SImode)
3948    return "xxinsertw %x0,%x2,%3";
3949  else
3950    return "vinsert<wd> %0,%2,%3";
3951}
3952  [(set_attr "type" "vecperm")])
3953
3954(define_insn_and_split "vsx_set_v4sf_p9"
3955  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3956	(unspec:V4SF
3957	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3958	  (match_operand:SF 2 "gpc_reg_operand" "wa")
3959	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3960	 UNSPEC_VSX_SET))
3961   (clobber (match_scratch:SI 4 "=&wa"))]
3962  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3963  "#"
3964  "&& reload_completed"
3965  [(set (match_dup 5)
3966	(unspec:V4SF [(match_dup 2)]
3967		     UNSPEC_VSX_CVDPSPN))
3968   (parallel [(set (match_dup 4)
3969		   (vec_select:SI (match_dup 6)
3970				  (parallel [(match_dup 7)])))
3971	      (clobber (scratch:SI))])
3972   (set (match_dup 8)
3973	(unspec:V4SI [(match_dup 8)
3974		      (match_dup 4)
3975		      (match_dup 3)]
3976		     UNSPEC_VSX_SET))]
3977{
3978  unsigned int tmp_regno = reg_or_subregno (operands[4]);
3979
3980  operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3981  operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3982  operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
3983  operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3984}
3985  [(set_attr "type" "vecperm")
3986   (set_attr "length" "12")
3987   (set_attr "isa" "p9v")])
3988
3989;; Special case setting 0.0f to a V4SF element
3990(define_insn_and_split "*vsx_set_v4sf_p9_zero"
3991  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3992	(unspec:V4SF
3993	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3994	  (match_operand:SF 2 "zero_fp_constant" "j")
3995	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
3996	 UNSPEC_VSX_SET))
3997   (clobber (match_scratch:SI 4 "=&wa"))]
3998  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
3999  "#"
4000  "&& reload_completed"
4001  [(set (match_dup 4)
4002	(const_int 0))
4003   (set (match_dup 5)
4004	(unspec:V4SI [(match_dup 5)
4005		      (match_dup 4)
4006		      (match_dup 3)]
4007		     UNSPEC_VSX_SET))]
4008{
4009  operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4010}
4011  [(set_attr "type" "vecperm")
4012   (set_attr "length" "8")
4013   (set_attr "isa" "p9v")])
4014
4015;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4016;; that is in the default scalar position (1 for big endian, 2 for little
4017;; endian).  We just need to do an xxinsertw since the element is in the
4018;; correct location.
4019
4020(define_insn "*vsx_insert_extract_v4sf_p9"
4021  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4022	(unspec:V4SF
4023	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4024	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4025			 (parallel
4026			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4027	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4028	 UNSPEC_VSX_SET))]
4029  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4030   && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4031{
4032  int ele = INTVAL (operands[4]);
4033
4034  if (!BYTES_BIG_ENDIAN)
4035    ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4036
4037  operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4038  return "xxinsertw %x0,%x2,%4";
4039}
4040  [(set_attr "type" "vecperm")])
4041
4042;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4043;; that is in the default scalar position (1 for big endian, 2 for little
4044;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4045
4046(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4047  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4048	(unspec:V4SF
4049	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4050	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4051			 (parallel
4052			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4053	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
4054	 UNSPEC_VSX_SET))
4055   (clobber (match_scratch:SI 5 "=&wa"))]
4056  "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4057   && TARGET_P9_VECTOR && TARGET_POWERPC64
4058   && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4059  "#"
4060  "&& 1"
4061  [(parallel [(set (match_dup 5)
4062		   (vec_select:SI (match_dup 6)
4063				  (parallel [(match_dup 3)])))
4064	      (clobber (scratch:SI))])
4065   (set (match_dup 7)
4066	(unspec:V4SI [(match_dup 8)
4067		      (match_dup 5)
4068		      (match_dup 4)]
4069		     UNSPEC_VSX_SET))]
4070{
4071  if (GET_CODE (operands[5]) == SCRATCH)
4072    operands[5] = gen_reg_rtx (SImode);
4073
4074  operands[6] = gen_lowpart (V4SImode, operands[2]);
4075  operands[7] = gen_lowpart (V4SImode, operands[0]);
4076  operands[8] = gen_lowpart (V4SImode, operands[1]);
4077}
4078  [(set_attr "type" "vecperm")
4079   (set_attr "isa" "p9v")])
4080
4081;; Expanders for builtins
4082(define_expand "vsx_mergel_<mode>"
4083  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4084   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4085   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4086  "VECTOR_MEM_VSX_P (<MODE>mode)"
4087{
4088  rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4089  rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4090  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4091  emit_insn (gen_rtx_SET (operands[0], x));
4092  DONE;
4093})
4094
4095(define_expand "vsx_mergeh_<mode>"
4096  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4097   (use (match_operand:VSX_D 1 "vsx_register_operand"))
4098   (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4099  "VECTOR_MEM_VSX_P (<MODE>mode)"
4100{
4101  rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4102  rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4103  x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4104  emit_insn (gen_rtx_SET (operands[0], x));
4105  DONE;
4106})
4107
4108;; V2DF/V2DI splat
4109;; We separate the register splat insn from the memory splat insn to force the
4110;; register allocator to generate the indexed form of the SPLAT when it is
4111;; given an offsettable memory reference.  Otherwise, if the register and
4112;; memory insns were combined into a single insn, the register allocator will
4113;; load the value into a register, and then do a double word permute.
4114(define_expand "vsx_splat_<mode>"
4115  [(set (match_operand:VSX_D 0 "vsx_register_operand")
4116	(vec_duplicate:VSX_D
4117	 (match_operand:<VS_scalar> 1 "input_operand")))]
4118  "VECTOR_MEM_VSX_P (<MODE>mode)"
4119{
4120  rtx op1 = operands[1];
4121  if (MEM_P (op1))
4122    operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4123  else if (!REG_P (op1))
4124    op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4125})
4126
4127(define_insn "vsx_splat_<mode>_reg"
4128  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4129	(vec_duplicate:VSX_D
4130	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))]
4131  "VECTOR_MEM_VSX_P (<MODE>mode)"
4132  "@
4133   xxpermdi %x0,%x1,%x1,0
4134   mtvsrdd %x0,%1,%1"
4135  [(set_attr "type" "vecperm")])
4136
4137(define_insn "vsx_splat_<mode>_mem"
4138  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4139	(vec_duplicate:VSX_D
4140	 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4141  "VECTOR_MEM_VSX_P (<MODE>mode)"
4142  "lxvdsx %x0,%y1"
4143  [(set_attr "type" "vecload")])
4144
4145;; V4SI splat support
4146(define_insn "vsx_splat_v4si"
4147  [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
4148	(vec_duplicate:V4SI
4149	 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4150  "TARGET_P9_VECTOR"
4151  "@
4152   mtvsrws %x0,%1
4153   lxvwsx %x0,%y1"
4154  [(set_attr "type" "vecperm,vecload")])
4155
4156;; SImode is not currently allowed in vector registers.  This pattern
4157;; allows us to use direct move to get the value in a vector register
4158;; so that we can use XXSPLTW
4159(define_insn "vsx_splat_v4si_di"
4160  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4161	(vec_duplicate:V4SI
4162	 (truncate:SI
4163	  (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4164  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4165  "@
4166   xxspltw %x0,%x1,1
4167   mtvsrws %x0,%1"
4168  [(set_attr "type" "vecperm")
4169   (set_attr "isa" "p8v,*")])
4170
4171;; V4SF splat (ISA 3.0)
4172(define_insn_and_split "vsx_splat_v4sf"
4173  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4174	(vec_duplicate:V4SF
4175	 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4176  "TARGET_P9_VECTOR"
4177  "@
4178   lxvwsx %x0,%y1
4179   #
4180   mtvsrws %x0,%1"
4181  "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4182  [(set (match_dup 0)
4183	(unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4184   (set (match_dup 0)
4185	(unspec:V4SF [(match_dup 0)
4186		      (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4187  ""
4188  [(set_attr "type" "vecload,vecperm,mftgpr")
4189   (set_attr "length" "*,8,*")
4190   (set_attr "isa" "*,p8v,*")])
4191
4192;; V4SF/V4SI splat from a vector element
4193(define_insn "vsx_xxspltw_<mode>"
4194  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4195	(vec_duplicate:VSX_W
4196	 (vec_select:<VS_scalar>
4197	  (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4198	  (parallel
4199	   [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4200  "VECTOR_MEM_VSX_P (<MODE>mode)"
4201{
4202  if (!BYTES_BIG_ENDIAN)
4203    operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4204
4205  return "xxspltw %x0,%x1,%2";
4206}
4207  [(set_attr "type" "vecperm")])
4208
4209(define_insn "vsx_xxspltw_<mode>_direct"
4210  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4211        (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4212                       (match_operand:QI 2 "u5bit_cint_operand" "i")]
4213                      UNSPEC_VSX_XXSPLTW))]
4214  "VECTOR_MEM_VSX_P (<MODE>mode)"
4215  "xxspltw %x0,%x1,%2"
4216  [(set_attr "type" "vecperm")])
4217
4218;; V16QI/V8HI splat support on ISA 2.07
4219(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4220  [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4221	(vec_duplicate:VSX_SPLAT_I
4222	 (truncate:<VS_scalar>
4223	  (match_operand:DI 1 "altivec_register_operand" "v"))))]
4224  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4225  "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4226  [(set_attr "type" "vecperm")])
4227
4228;; V2DF/V2DI splat for use by vec_splat builtin
4229(define_insn "vsx_xxspltd_<mode>"
4230  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4231        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4232	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
4233                      UNSPEC_VSX_XXSPLTD))]
4234  "VECTOR_MEM_VSX_P (<MODE>mode)"
4235{
4236  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4237      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4238    return "xxpermdi %x0,%x1,%x1,0";
4239  else
4240    return "xxpermdi %x0,%x1,%x1,3";
4241}
4242  [(set_attr "type" "vecperm")])
4243
4244;; V4SF/V4SI interleave
4245(define_insn "vsx_xxmrghw_<mode>"
4246  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4247        (vec_select:VSX_W
4248	  (vec_concat:<VS_double>
4249	    (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4250	    (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4251	  (parallel [(const_int 0) (const_int 4)
4252		     (const_int 1) (const_int 5)])))]
4253  "VECTOR_MEM_VSX_P (<MODE>mode)"
4254{
4255  if (BYTES_BIG_ENDIAN)
4256    return "xxmrghw %x0,%x1,%x2";
4257  else
4258    return "xxmrglw %x0,%x2,%x1";
4259}
4260  [(set_attr "type" "vecperm")])
4261
4262(define_insn "vsx_xxmrglw_<mode>"
4263  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4264	(vec_select:VSX_W
4265	  (vec_concat:<VS_double>
4266	    (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4267	    (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4268	  (parallel [(const_int 2) (const_int 6)
4269		     (const_int 3) (const_int 7)])))]
4270  "VECTOR_MEM_VSX_P (<MODE>mode)"
4271{
4272  if (BYTES_BIG_ENDIAN)
4273    return "xxmrglw %x0,%x1,%x2";
4274  else
4275    return "xxmrghw %x0,%x2,%x1";
4276}
4277  [(set_attr "type" "vecperm")])
4278
4279;; Shift left double by word immediate
4280(define_insn "vsx_xxsldwi_<mode>"
4281  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4282	(unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4283		       (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4284		       (match_operand:QI 3 "u5bit_cint_operand" "i")]
4285		      UNSPEC_VSX_SLDWI))]
4286  "VECTOR_MEM_VSX_P (<MODE>mode)"
4287  "xxsldwi %x0,%x1,%x2,%3"
4288  [(set_attr "type" "vecperm")
4289   (set_attr "isa" "<VSisa>")])
4290
4291
4292;; Vector reduction insns and splitters
4293
4294(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4295  [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4296	(VEC_reduc:V2DF
4297	 (vec_concat:V2DF
4298	  (vec_select:DF
4299	   (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4300	   (parallel [(const_int 1)]))
4301	  (vec_select:DF
4302	   (match_dup 1)
4303	   (parallel [(const_int 0)])))
4304	 (match_dup 1)))
4305   (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4306  "VECTOR_UNIT_VSX_P (V2DFmode)"
4307  "#"
4308  ""
4309  [(const_int 0)]
4310{
4311  rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4312	     ? gen_reg_rtx (V2DFmode)
4313	     : operands[2];
4314  emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4315  emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4316  DONE;
4317}
4318  [(set_attr "length" "8")
4319   (set_attr "type" "veccomplex")])
4320
4321(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4322  [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4323	(VEC_reduc:V4SF
4324	 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4325	 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4326   (clobber (match_scratch:V4SF 2 "=&wa"))
4327   (clobber (match_scratch:V4SF 3 "=&wa"))]
4328  "VECTOR_UNIT_VSX_P (V4SFmode)"
4329  "#"
4330  ""
4331  [(const_int 0)]
4332{
4333  rtx op0 = operands[0];
4334  rtx op1 = operands[1];
4335  rtx tmp2, tmp3, tmp4;
4336
4337  if (can_create_pseudo_p ())
4338    {
4339      tmp2 = gen_reg_rtx (V4SFmode);
4340      tmp3 = gen_reg_rtx (V4SFmode);
4341      tmp4 = gen_reg_rtx (V4SFmode);
4342    }
4343  else
4344    {
4345      tmp2 = operands[2];
4346      tmp3 = operands[3];
4347      tmp4 = tmp2;
4348    }
4349
4350  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4351  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4352  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4353  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4354  DONE;
4355}
4356  [(set_attr "length" "16")
4357   (set_attr "type" "veccomplex")])
4358
4359;; Combiner patterns with the vector reduction patterns that knows we can get
4360;; to the top element of the V2DF array without doing an extract.
4361
4362(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4363  [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4364	(vec_select:DF
4365	 (VEC_reduc:V2DF
4366	  (vec_concat:V2DF
4367	   (vec_select:DF
4368	    (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4369	    (parallel [(const_int 1)]))
4370	   (vec_select:DF
4371	    (match_dup 1)
4372	    (parallel [(const_int 0)])))
4373	  (match_dup 1))
4374	 (parallel [(const_int 1)])))
4375   (clobber (match_scratch:DF 2 "=0,&wa"))]
4376  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4377  "#"
4378  ""
4379  [(const_int 0)]
4380{
4381  rtx hi = gen_highpart (DFmode, operands[1]);
4382  rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4383	    ? gen_reg_rtx (DFmode)
4384	    : operands[2];
4385
4386  emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4387  emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4388  DONE;
4389}
4390  [(set_attr "length" "8")
4391   (set_attr "type" "veccomplex")])
4392
4393(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4394  [(set (match_operand:SF 0 "vfloat_operand" "=f")
4395	(vec_select:SF
4396	 (VEC_reduc:V4SF
4397	  (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4398	  (match_operand:V4SF 1 "vfloat_operand" "wa"))
4399	 (parallel [(const_int 3)])))
4400   (clobber (match_scratch:V4SF 2 "=&wa"))
4401   (clobber (match_scratch:V4SF 3 "=&wa"))
4402   (clobber (match_scratch:V4SF 4 "=0"))]
4403  "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4404  "#"
4405  ""
4406  [(const_int 0)]
4407{
4408  rtx op0 = operands[0];
4409  rtx op1 = operands[1];
4410  rtx tmp2, tmp3, tmp4, tmp5;
4411
4412  if (can_create_pseudo_p ())
4413    {
4414      tmp2 = gen_reg_rtx (V4SFmode);
4415      tmp3 = gen_reg_rtx (V4SFmode);
4416      tmp4 = gen_reg_rtx (V4SFmode);
4417      tmp5 = gen_reg_rtx (V4SFmode);
4418    }
4419  else
4420    {
4421      tmp2 = operands[2];
4422      tmp3 = operands[3];
4423      tmp4 = tmp2;
4424      tmp5 = operands[4];
4425    }
4426
4427  emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4428  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4429  emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4430  emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4431  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4432  DONE;
4433}
4434  [(set_attr "length" "20")
4435   (set_attr "type" "veccomplex")])
4436
4437
4438;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4439(define_peephole
4440  [(set (match_operand:P 0 "base_reg_operand")
4441	(match_operand:P 1 "short_cint_operand"))
4442   (set (match_operand:VSX_M 2 "vsx_register_operand")
4443	(mem:VSX_M (plus:P (match_dup 0)
4444			   (match_operand:P 3 "int_reg_operand"))))]
4445  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4446  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4447  [(set_attr "length" "8")
4448   (set_attr "type" "vecload")])
4449
4450(define_peephole
4451  [(set (match_operand:P 0 "base_reg_operand")
4452	(match_operand:P 1 "short_cint_operand"))
4453   (set (match_operand:VSX_M 2 "vsx_register_operand")
4454	(mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4455			   (match_dup 0))))]
4456  "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4457  "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4458  [(set_attr "length" "8")
4459   (set_attr "type" "vecload")])
4460
4461
4462;; ISA 3.0 vector extend sign support
4463
4464(define_insn "vsx_sign_extend_qi_<mode>"
4465  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4466	(unspec:VSINT_84
4467	 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4468	 UNSPEC_VSX_SIGN_EXTEND))]
4469  "TARGET_P9_VECTOR"
4470  "vextsb2<wd> %0,%1"
4471  [(set_attr "type" "vecexts")])
4472
4473(define_insn "vsx_sign_extend_hi_<mode>"
4474  [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4475	(unspec:VSINT_84
4476	 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4477	 UNSPEC_VSX_SIGN_EXTEND))]
4478  "TARGET_P9_VECTOR"
4479  "vextsh2<wd> %0,%1"
4480  [(set_attr "type" "vecexts")])
4481
4482(define_insn "*vsx_sign_extend_si_v2di"
4483  [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4484	(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4485		     UNSPEC_VSX_SIGN_EXTEND))]
4486  "TARGET_P9_VECTOR"
4487  "vextsw2d %0,%1"
4488  [(set_attr "type" "vecexts")])
4489
4490
4491;; ISA 3.0 Binary Floating-Point Support
4492
4493;; VSX Scalar Extract Exponent Quad-Precision
4494(define_insn "xsxexpqp_<mode>"
4495  [(set (match_operand:DI 0 "altivec_register_operand" "=v")
4496	(unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4497	 UNSPEC_VSX_SXEXPDP))]
4498  "TARGET_P9_VECTOR"
4499  "xsxexpqp %0,%1"
4500  [(set_attr "type" "vecmove")])
4501
4502;; VSX Scalar Extract Exponent Double-Precision
4503(define_insn "xsxexpdp"
4504  [(set (match_operand:DI 0 "register_operand" "=r")
4505	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4506	 UNSPEC_VSX_SXEXPDP))]
4507  "TARGET_P9_VECTOR && TARGET_64BIT"
4508  "xsxexpdp %0,%x1"
4509  [(set_attr "type" "integer")])
4510
4511;; VSX Scalar Extract Significand Quad-Precision
4512(define_insn "xsxsigqp_<mode>"
4513  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
4514	(unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
4515	 UNSPEC_VSX_SXSIG))]
4516  "TARGET_P9_VECTOR"
4517  "xsxsigqp %0,%1"
4518  [(set_attr "type" "vecmove")])
4519
4520;; VSX Scalar Extract Significand Double-Precision
4521(define_insn "xsxsigdp"
4522  [(set (match_operand:DI 0 "register_operand" "=r")
4523	(unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4524	 UNSPEC_VSX_SXSIG))]
4525  "TARGET_P9_VECTOR && TARGET_64BIT"
4526  "xsxsigdp %0,%x1"
4527  [(set_attr "type" "integer")])
4528
4529;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
4530(define_insn "xsiexpqpf_<mode>"
4531  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4532	(unspec:IEEE128
4533	 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4534	  (match_operand:DI 2 "altivec_register_operand" "v")]
4535	 UNSPEC_VSX_SIEXPQP))]
4536  "TARGET_P9_VECTOR"
4537  "xsiexpqp %0,%1,%2"
4538  [(set_attr "type" "vecmove")])
4539
4540;; VSX Scalar Insert Exponent Quad-Precision
4541(define_insn "xsiexpqp_<mode>"
4542  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4543	(unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4544			 (match_operand:DI 2 "altivec_register_operand" "v")]
4545	 UNSPEC_VSX_SIEXPQP))]
4546  "TARGET_P9_VECTOR"
4547  "xsiexpqp %0,%1,%2"
4548  [(set_attr "type" "vecmove")])
4549
4550;; VSX Scalar Insert Exponent Double-Precision
4551(define_insn "xsiexpdp"
4552  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4553	(unspec:DF [(match_operand:DI 1 "register_operand" "r")
4554		    (match_operand:DI 2 "register_operand" "r")]
4555	 UNSPEC_VSX_SIEXPDP))]
4556  "TARGET_P9_VECTOR && TARGET_64BIT"
4557  "xsiexpdp %x0,%1,%2"
4558  [(set_attr "type" "fpsimple")])
4559
4560;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4561(define_insn "xsiexpdpf"
4562  [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4563	(unspec:DF [(match_operand:DF 1 "register_operand" "r")
4564		    (match_operand:DI 2 "register_operand" "r")]
4565	 UNSPEC_VSX_SIEXPDP))]
4566  "TARGET_P9_VECTOR && TARGET_64BIT"
4567  "xsiexpdp %x0,%1,%2"
4568  [(set_attr "type" "fpsimple")])
4569
4570;; VSX Scalar Compare Exponents Double-Precision
4571(define_expand "xscmpexpdp_<code>"
4572  [(set (match_dup 3)
4573	(compare:CCFP
4574	 (unspec:DF
4575	  [(match_operand:DF 1 "vsx_register_operand" "wa")
4576	   (match_operand:DF 2 "vsx_register_operand" "wa")]
4577	  UNSPEC_VSX_SCMPEXPDP)
4578	 (const_int 0)))
4579   (set (match_operand:SI 0 "register_operand" "=r")
4580	(CMP_TEST:SI (match_dup 3)
4581		     (const_int 0)))]
4582  "TARGET_P9_VECTOR"
4583{
4584  if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
4585    {
4586      emit_move_insn (operands[0], const0_rtx);
4587      DONE;
4588    }
4589
4590  operands[3] = gen_reg_rtx (CCFPmode);
4591})
4592
4593(define_insn "*xscmpexpdp"
4594  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4595	(compare:CCFP
4596	 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4597		     (match_operand:DF 2 "vsx_register_operand" "wa")]
4598	  UNSPEC_VSX_SCMPEXPDP)
4599	 (match_operand:SI 3 "zero_constant" "j")))]
4600  "TARGET_P9_VECTOR"
4601  "xscmpexpdp %0,%x1,%x2"
4602  [(set_attr "type" "fpcompare")])
4603
4604;; VSX Scalar Compare Exponents Quad-Precision
4605(define_expand "xscmpexpqp_<code>_<mode>"
4606  [(set (match_dup 3)
4607	(compare:CCFP
4608	 (unspec:IEEE128
4609	  [(match_operand:IEEE128 1 "vsx_register_operand" "v")
4610	   (match_operand:IEEE128 2 "vsx_register_operand" "v")]
4611	  UNSPEC_VSX_SCMPEXPQP)
4612	 (const_int 0)))
4613   (set (match_operand:SI 0 "register_operand" "=r")
4614	(CMP_TEST:SI (match_dup 3)
4615		     (const_int 0)))]
4616  "TARGET_P9_VECTOR"
4617{
4618  if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
4619    {
4620      emit_move_insn (operands[0], const0_rtx);
4621      DONE;
4622    }
4623
4624  operands[3] = gen_reg_rtx (CCFPmode);
4625})
4626
4627(define_insn "*xscmpexpqp"
4628  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4629	(compare:CCFP
4630	 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4631		          (match_operand:IEEE128 2 "altivec_register_operand" "v")]
4632	  UNSPEC_VSX_SCMPEXPQP)
4633	 (match_operand:SI 3 "zero_constant" "j")))]
4634  "TARGET_P9_VECTOR"
4635  "xscmpexpqp %0,%1,%2"
4636  [(set_attr "type" "fpcompare")])
4637
4638;; VSX Scalar Test Data Class Quad-Precision
4639;;  (Expansion for scalar_test_data_class (__ieee128, int))
4640;;   (Has side effect of setting the lt bit if operand 1 is negative,
4641;;    setting the eq bit if any of the conditions tested by operand 2
4642;;    are satisfied, and clearing the gt and undordered bits to zero.)
4643(define_expand "xststdcqp_<mode>"
4644  [(set (match_dup 3)
4645	(compare:CCFP
4646	 (unspec:IEEE128
4647	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4648	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4649	  UNSPEC_VSX_STSTDC)
4650	 (const_int 0)))
4651   (set (match_operand:SI 0 "register_operand" "=r")
4652	(eq:SI (match_dup 3)
4653	       (const_int 0)))]
4654  "TARGET_P9_VECTOR"
4655{
4656  operands[3] = gen_reg_rtx (CCFPmode);
4657})
4658
4659;; VSX Scalar Test Data Class Double- and Single-Precision
4660;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
4661;;   if any of the conditions tested by operand 2 are satisfied.
4662;;   The gt and unordered bits are cleared to zero.)
4663(define_expand "xststdc<sd>p"
4664  [(set (match_dup 3)
4665	(compare:CCFP
4666	 (unspec:SFDF
4667	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4668	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4669	  UNSPEC_VSX_STSTDC)
4670	 (match_dup 4)))
4671   (set (match_operand:SI 0 "register_operand" "=r")
4672	(eq:SI (match_dup 3)
4673	       (const_int 0)))]
4674  "TARGET_P9_VECTOR"
4675{
4676  operands[3] = gen_reg_rtx (CCFPmode);
4677  operands[4] = CONST0_RTX (SImode);
4678})
4679
4680;; The VSX Scalar Test Negative Quad-Precision
4681(define_expand "xststdcnegqp_<mode>"
4682  [(set (match_dup 2)
4683	(compare:CCFP
4684	 (unspec:IEEE128
4685	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4686	   (const_int 0)]
4687	  UNSPEC_VSX_STSTDC)
4688	 (const_int 0)))
4689   (set (match_operand:SI 0 "register_operand" "=r")
4690	(lt:SI (match_dup 2)
4691	       (const_int 0)))]
4692  "TARGET_P9_VECTOR"
4693{
4694  operands[2] = gen_reg_rtx (CCFPmode);
4695})
4696
4697;; The VSX Scalar Test Negative Double- and Single-Precision
4698(define_expand "xststdcneg<sd>p"
4699  [(set (match_dup 2)
4700	(compare:CCFP
4701	 (unspec:SFDF
4702	  [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4703	   (const_int 0)]
4704	  UNSPEC_VSX_STSTDC)
4705	 (match_dup 3)))
4706   (set (match_operand:SI 0 "register_operand" "=r")
4707	(lt:SI (match_dup 2)
4708	       (const_int 0)))]
4709  "TARGET_P9_VECTOR"
4710{
4711  operands[2] = gen_reg_rtx (CCFPmode);
4712  operands[3] = CONST0_RTX (SImode);
4713})
4714
4715(define_insn "*xststdcqp_<mode>"
4716  [(set (match_operand:CCFP 0 "" "=y")
4717	(compare:CCFP
4718	 (unspec:IEEE128
4719	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4720	   (match_operand:SI 2 "u7bit_cint_operand" "n")]
4721	  UNSPEC_VSX_STSTDC)
4722	 (const_int 0)))]
4723  "TARGET_P9_VECTOR"
4724  "xststdcqp %0,%1,%2"
4725  [(set_attr "type" "fpcompare")])
4726
4727(define_insn "*xststdc<sd>p"
4728  [(set (match_operand:CCFP 0 "" "=y")
4729	(compare:CCFP
4730	 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4731		       (match_operand:SI 2 "u7bit_cint_operand" "n")]
4732	  UNSPEC_VSX_STSTDC)
4733	 (match_operand:SI 3 "zero_constant" "j")))]
4734  "TARGET_P9_VECTOR"
4735  "xststdc<sd>p %0,%x1,%2"
4736  [(set_attr "type" "fpcompare")])
4737
4738;; VSX Vector Extract Exponent Double and Single Precision
4739(define_insn "xvxexp<sd>p"
4740  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4741	(unspec:VSX_F
4742	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4743	 UNSPEC_VSX_VXEXP))]
4744  "TARGET_P9_VECTOR"
4745  "xvxexp<sd>p %x0,%x1"
4746  [(set_attr "type" "vecsimple")])
4747
4748;; VSX Vector Extract Significand Double and Single Precision
4749(define_insn "xvxsig<sd>p"
4750  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4751	(unspec:VSX_F
4752	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4753	 UNSPEC_VSX_VXSIG))]
4754  "TARGET_P9_VECTOR"
4755  "xvxsig<sd>p %x0,%x1"
4756  [(set_attr "type" "vecsimple")])
4757
4758;; VSX Vector Insert Exponent Double and Single Precision
4759(define_insn "xviexp<sd>p"
4760  [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4761	(unspec:VSX_F
4762	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4763	  (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4764	 UNSPEC_VSX_VIEXP))]
4765  "TARGET_P9_VECTOR"
4766  "xviexp<sd>p %x0,%x1,%x2"
4767  [(set_attr "type" "vecsimple")])
4768
4769;; VSX Vector Test Data Class Double and Single Precision
4770;; The corresponding elements of the result vector are all ones
4771;; if any of the conditions tested by operand 3 are satisfied.
4772(define_insn "xvtstdc<sd>p"
4773  [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4774	(unspec:<VSI>
4775	 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4776	  (match_operand:SI 2 "u7bit_cint_operand" "n")]
4777	 UNSPEC_VSX_VTSTDC))]
4778  "TARGET_P9_VECTOR"
4779  "xvtstdc<sd>p %x0,%x1,%2"
4780  [(set_attr "type" "vecsimple")])
4781
4782;; ISA 3.0 String Operations Support
4783
4784;; Compare vectors producing a vector result and a predicate, setting CR6
4785;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
4786;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
4787;; need to match v4sf, v2df, or v2di modes because those are expanded
4788;; to use Power8 instructions.
4789(define_insn "*vsx_ne_<mode>_p"
4790  [(set (reg:CC CR6_REGNO)
4791	(unspec:CC
4792	 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4793		 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4794	 UNSPEC_PREDICATE))
4795   (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4796	(ne:VSX_EXTRACT_I (match_dup 1)
4797			  (match_dup 2)))]
4798  "TARGET_P9_VECTOR"
4799  "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4800  [(set_attr "type" "vecsimple")])
4801
4802(define_insn "*vector_nez_<mode>_p"
4803  [(set (reg:CC CR6_REGNO)
4804	(unspec:CC [(unspec:VI
4805		     [(match_operand:VI 1 "gpc_reg_operand" "v")
4806		      (match_operand:VI 2 "gpc_reg_operand" "v")]
4807		     UNSPEC_NEZ_P)]
4808	 UNSPEC_PREDICATE))
4809   (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4810	(unspec:VI [(match_dup 1)
4811		    (match_dup 2)]
4812	 UNSPEC_NEZ_P))]
4813  "TARGET_P9_VECTOR"
4814  "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4815  [(set_attr "type" "vecsimple")])
4816
4817;; Return first position of match between vectors using natural order
4818;; for both LE and BE execution modes.
4819(define_expand "first_match_index_<mode>"
4820  [(match_operand:SI 0 "register_operand")
4821   (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4822	       (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4823  UNSPEC_VSX_FIRST_MATCH_INDEX)]
4824  "TARGET_P9_VECTOR"
4825{
4826  int sh;
4827
4828  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4829  rtx not_result = gen_reg_rtx (<MODE>mode);
4830
4831  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4832					     operands[2]));
4833  emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4834
4835  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4836
4837  if (<MODE>mode == V16QImode)
4838    {
4839      if (!BYTES_BIG_ENDIAN)
4840        emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4841      else
4842        emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
4843    }
4844  else
4845    {
4846      rtx tmp = gen_reg_rtx (SImode);
4847      if (!BYTES_BIG_ENDIAN)
4848        emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4849      else
4850        emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
4851      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4852    }
4853  DONE;
4854})
4855
4856;; Return first position of match between vectors or end of string (EOS) using
4857;; natural element order for both LE and BE execution modes.
4858(define_expand "first_match_or_eos_index_<mode>"
4859  [(match_operand:SI 0 "register_operand")
4860   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4861   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4862  UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4863  "TARGET_P9_VECTOR"
4864{
4865  int sh;
4866  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4867  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4868  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4869  rtx and_result = gen_reg_rtx (<MODE>mode);
4870  rtx result = gen_reg_rtx (<MODE>mode);
4871  rtx vzero = gen_reg_rtx (<MODE>mode);
4872
4873  /* Vector with zeros in elements that correspond to zeros in operands.  */
4874  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4875  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4876  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4877  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4878
4879  /* Vector with ones in elments that do not match.  */
4880  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4881                                             operands[2]));
4882
4883  /* Create vector with ones in elements where there was a zero in one of
4884     the source elements or the elements that match.  */
4885  emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4886  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4887
4888  if (<MODE>mode == V16QImode)
4889    {
4890      if (!BYTES_BIG_ENDIAN)
4891        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4892      else
4893        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4894    }
4895  else
4896    {
4897      rtx tmp = gen_reg_rtx (SImode);
4898      if (!BYTES_BIG_ENDIAN)
4899        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4900      else
4901        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4902      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4903    }
4904  DONE;
4905})
4906
4907;; Return first position of mismatch between vectors using natural
4908;; element order for both LE and BE execution modes.
4909(define_expand "first_mismatch_index_<mode>"
4910  [(match_operand:SI 0 "register_operand")
4911   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4912   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4913  UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4914  "TARGET_P9_VECTOR"
4915{
4916  int sh;
4917  rtx cmp_result = gen_reg_rtx (<MODE>mode);
4918
4919  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4920					    operands[2]));
4921  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4922
4923  if (<MODE>mode == V16QImode)
4924    {
4925      if (!BYTES_BIG_ENDIAN)
4926        emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4927      else
4928        emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
4929    }
4930  else
4931    {
4932      rtx tmp = gen_reg_rtx (SImode);
4933      if (!BYTES_BIG_ENDIAN)
4934        emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4935      else
4936        emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
4937      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4938    }
4939  DONE;
4940})
4941
4942;; Return first position of mismatch between vectors or end of string (EOS)
4943;; using natural element order for both LE and BE execution modes.
4944(define_expand "first_mismatch_or_eos_index_<mode>"
4945  [(match_operand:SI 0 "register_operand")
4946   (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4947   (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4948  UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4949  "TARGET_P9_VECTOR"
4950{
4951  int sh;
4952  rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4953  rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4954  rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4955  rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4956  rtx and_result = gen_reg_rtx (<MODE>mode);
4957  rtx result = gen_reg_rtx (<MODE>mode);
4958  rtx vzero = gen_reg_rtx (<MODE>mode);
4959
4960  /* Vector with zeros in elements that correspond to zeros in operands.  */
4961  emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4962
4963  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4964  emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4965  emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4966
4967  /* Vector with ones in elments that match.  */
4968  emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4969                                             operands[2]));
4970  emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4971
4972  /* Create vector with ones in elements where there was a zero in one of
4973     the source elements or the elements did not match.  */
4974  emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4975  sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4976
4977  if (<MODE>mode == V16QImode)
4978    {
4979      if (!BYTES_BIG_ENDIAN)
4980        emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4981      else
4982        emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
4983    }
4984  else
4985    {
4986      rtx tmp = gen_reg_rtx (SImode);
4987      if (!BYTES_BIG_ENDIAN)
4988        emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4989      else
4990        emit_insn (gen_vclzlsbb_<mode> (tmp, result));
4991      emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
4992    }
4993  DONE;
4994})
4995
4996;; Load VSX Vector with Length
4997(define_expand "lxvl"
4998  [(set (match_dup 3)
4999        (ashift:DI (match_operand:DI 2 "register_operand")
5000                   (const_int 56)))
5001   (set (match_operand:V16QI 0 "vsx_register_operand")
5002	(unspec:V16QI
5003	 [(match_operand:DI 1 "gpc_reg_operand")
5004          (mem:V16QI (match_dup 1))
5005	  (match_dup 3)]
5006	 UNSPEC_LXVL))]
5007  "TARGET_P9_VECTOR && TARGET_64BIT"
5008{
5009  operands[3] = gen_reg_rtx (DImode);
5010})
5011
5012(define_insn "*lxvl"
5013  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5014	(unspec:V16QI
5015	 [(match_operand:DI 1 "gpc_reg_operand" "b")
5016	  (mem:V16QI (match_dup 1))
5017	  (match_operand:DI 2 "register_operand" "r")]
5018	 UNSPEC_LXVL))]
5019  "TARGET_P9_VECTOR && TARGET_64BIT"
5020  "lxvl %x0,%1,%2"
5021  [(set_attr "type" "vecload")])
5022
5023(define_insn "lxvll"
5024  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5025	(unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5026                       (mem:V16QI (match_dup 1))
5027		       (match_operand:DI 2 "register_operand" "r")]
5028		      UNSPEC_LXVLL))]
5029  "TARGET_P9_VECTOR"
5030  "lxvll %x0,%1,%2"
5031  [(set_attr "type" "vecload")])
5032
5033;; Expand for builtin xl_len_r
5034(define_expand "xl_len_r"
5035  [(match_operand:V16QI 0 "vsx_register_operand")
5036   (match_operand:DI 1 "register_operand")
5037   (match_operand:DI 2 "register_operand")]
5038  ""
5039{
5040  rtx shift_mask = gen_reg_rtx (V16QImode);
5041  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5042  rtx tmp = gen_reg_rtx (DImode);
5043
5044  emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
5045  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5046  emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5047  emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5048	     shift_mask));
5049  DONE;
5050})
5051
5052(define_insn "stxvll"
5053  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5054	(unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5055		       (mem:V16QI (match_dup 1))
5056		       (match_operand:DI 2 "register_operand" "r")]
5057	              UNSPEC_STXVLL))]
5058  "TARGET_P9_VECTOR"
5059  "stxvll %x0,%1,%2"
5060  [(set_attr "type" "vecstore")])
5061
5062;; Store VSX Vector with Length
5063(define_expand "stxvl"
5064  [(set (match_dup 3)
5065	(ashift:DI (match_operand:DI 2 "register_operand")
5066		   (const_int 56)))
5067   (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5068	(unspec:V16QI
5069	 [(match_operand:V16QI 0 "vsx_register_operand")
5070	  (mem:V16QI (match_dup 1))
5071	  (match_dup 3)]
5072	 UNSPEC_STXVL))]
5073  "TARGET_P9_VECTOR && TARGET_64BIT"
5074{
5075  operands[3] = gen_reg_rtx (DImode);
5076})
5077
5078(define_insn "*stxvl"
5079  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5080	(unspec:V16QI
5081	 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5082	  (mem:V16QI (match_dup 1))
5083	  (match_operand:DI 2 "register_operand" "r")]
5084	 UNSPEC_STXVL))]
5085  "TARGET_P9_VECTOR && TARGET_64BIT"
5086  "stxvl %x0,%1,%2"
5087  [(set_attr "type" "vecstore")])
5088
5089;; Expand for builtin xst_len_r
5090(define_expand "xst_len_r"
5091  [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5092   (match_operand:DI 1 "register_operand" "b")
5093   (match_operand:DI 2 "register_operand" "r")]
5094  "UNSPEC_XST_LEN_R"
5095{
5096  rtx shift_mask = gen_reg_rtx (V16QImode);
5097  rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5098  rtx tmp = gen_reg_rtx (DImode);
5099
5100  emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5101  emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5102	     shift_mask));
5103  emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5104  emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5105  DONE;
5106})
5107
5108;; Vector Compare Not Equal Byte (specified/not+eq:)
5109(define_insn "vcmpneb"
5110  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5111	 (not:V16QI
5112	   (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5113		     (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5114  "TARGET_P9_VECTOR"
5115  "vcmpneb %0,%1,%2"
5116  [(set_attr "type" "vecsimple")])
5117
5118;; Vector Compare Not Equal or Zero Byte
5119(define_insn "vcmpnezb"
5120  [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5121	(unspec:V16QI
5122	 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5123	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5124	 UNSPEC_VCMPNEZB))]
5125  "TARGET_P9_VECTOR"
5126  "vcmpnezb %0,%1,%2"
5127  [(set_attr "type" "vecsimple")])
5128
5129;; Vector Compare Not Equal or Zero Byte predicate or record-form
5130(define_insn "vcmpnezb_p"
5131  [(set (reg:CC CR6_REGNO)
5132	(unspec:CC
5133	 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5134	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5135	 UNSPEC_VCMPNEZB))
5136   (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5137	(unspec:V16QI
5138	 [(match_dup 1)
5139	  (match_dup 2)]
5140	 UNSPEC_VCMPNEZB))]
5141  "TARGET_P9_VECTOR"
5142  "vcmpnezb. %0,%1,%2"
5143  [(set_attr "type" "vecsimple")])
5144
5145;; Vector Compare Not Equal Half Word (specified/not+eq:)
5146(define_insn "vcmpneh"
5147  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5148	(not:V8HI
5149	  (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5150		   (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5151  "TARGET_P9_VECTOR"
5152  "vcmpneh %0,%1,%2"
5153  [(set_attr "type" "vecsimple")])
5154
5155;; Vector Compare Not Equal or Zero Half Word
5156(define_insn "vcmpnezh"
5157  [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5158	(unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5159		      (match_operand:V8HI 2 "altivec_register_operand" "v")]
5160	 UNSPEC_VCMPNEZH))]
5161  "TARGET_P9_VECTOR"
5162  "vcmpnezh %0,%1,%2"
5163  [(set_attr "type" "vecsimple")])
5164
5165;; Vector Compare Not Equal Word (specified/not+eq:)
5166(define_insn "vcmpnew"
5167  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5168	(not:V4SI
5169	  (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5170		   (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5171  "TARGET_P9_VECTOR"
5172  "vcmpnew %0,%1,%2"
5173  [(set_attr "type" "vecsimple")])
5174
5175;; Vector Compare Not Equal or Zero Word
5176(define_insn "vcmpnezw"
5177  [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5178	(unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5179		      (match_operand:V4SI 2 "altivec_register_operand" "v")]
5180	 UNSPEC_VCMPNEZW))]
5181  "TARGET_P9_VECTOR"
5182  "vcmpnezw %0,%1,%2"
5183  [(set_attr "type" "vecsimple")])
5184
5185;; Vector Count Leading Zero Least-Significant Bits Byte
5186(define_insn "vclzlsbb_<mode>"
5187  [(set (match_operand:SI 0 "register_operand" "=r")
5188	(unspec:SI
5189	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5190	 UNSPEC_VCLZLSBB))]
5191  "TARGET_P9_VECTOR"
5192  "vclzlsbb %0,%1"
5193  [(set_attr "type" "vecsimple")])
5194
5195;; Vector Count Trailing Zero Least-Significant Bits Byte
5196(define_insn "vctzlsbb_<mode>"
5197  [(set (match_operand:SI 0 "register_operand" "=r")
5198	(unspec:SI
5199	 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5200	 UNSPEC_VCTZLSBB))]
5201  "TARGET_P9_VECTOR"
5202  "vctzlsbb %0,%1"
5203  [(set_attr "type" "vecsimple")])
5204
5205;; Vector Extract Unsigned Byte Left-Indexed
5206(define_insn "vextublx"
5207  [(set (match_operand:SI 0 "register_operand" "=r")
5208	(unspec:SI
5209	 [(match_operand:SI 1 "register_operand" "r")
5210	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5211	 UNSPEC_VEXTUBLX))]
5212  "TARGET_P9_VECTOR"
5213  "vextublx %0,%1,%2"
5214  [(set_attr "type" "vecsimple")])
5215
5216;; Vector Extract Unsigned Byte Right-Indexed
5217(define_insn "vextubrx"
5218  [(set (match_operand:SI 0 "register_operand" "=r")
5219	(unspec:SI
5220	 [(match_operand:SI 1 "register_operand" "r")
5221	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
5222	 UNSPEC_VEXTUBRX))]
5223  "TARGET_P9_VECTOR"
5224  "vextubrx %0,%1,%2"
5225  [(set_attr "type" "vecsimple")])
5226
5227;; Vector Extract Unsigned Half Word Left-Indexed
5228(define_insn "vextuhlx"
5229  [(set (match_operand:SI 0 "register_operand" "=r")
5230	(unspec:SI
5231	 [(match_operand:SI 1 "register_operand" "r")
5232	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5233	 UNSPEC_VEXTUHLX))]
5234  "TARGET_P9_VECTOR"
5235  "vextuhlx %0,%1,%2"
5236  [(set_attr "type" "vecsimple")])
5237
5238;; Vector Extract Unsigned Half Word Right-Indexed
5239(define_insn "vextuhrx"
5240  [(set (match_operand:SI 0 "register_operand" "=r")
5241	(unspec:SI
5242	 [(match_operand:SI 1 "register_operand" "r")
5243	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
5244	 UNSPEC_VEXTUHRX))]
5245  "TARGET_P9_VECTOR"
5246  "vextuhrx %0,%1,%2"
5247  [(set_attr "type" "vecsimple")])
5248
5249;; Vector Extract Unsigned Word Left-Indexed
5250(define_insn "vextuwlx"
5251  [(set (match_operand:SI 0 "register_operand" "=r")
5252	(unspec:SI
5253	 [(match_operand:SI 1 "register_operand" "r")
5254	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5255	 UNSPEC_VEXTUWLX))]
5256  "TARGET_P9_VECTOR"
5257  "vextuwlx %0,%1,%2"
5258  [(set_attr "type" "vecsimple")])
5259
5260;; Vector Extract Unsigned Word Right-Indexed
5261(define_insn "vextuwrx"
5262  [(set (match_operand:SI 0 "register_operand" "=r")
5263	(unspec:SI
5264	 [(match_operand:SI 1 "register_operand" "r")
5265	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
5266	 UNSPEC_VEXTUWRX))]
5267  "TARGET_P9_VECTOR"
5268  "vextuwrx %0,%1,%2"
5269  [(set_attr "type" "vecsimple")])
5270
5271;; Vector insert/extract word at arbitrary byte values.  Note, the little
5272;; endian version needs to adjust the byte number, and the V4SI element in
5273;; vinsert4b.
5274(define_insn "extract4b"
5275  [(set (match_operand:V2DI 0 "vsx_register_operand")
5276       (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5277                     (match_operand:QI 2 "const_0_to_12_operand" "n")]
5278                    UNSPEC_XXEXTRACTUW))]
5279  "TARGET_P9_VECTOR"
5280{
5281  if (!BYTES_BIG_ENDIAN)
5282    operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5283
5284  return "xxextractuw %x0,%x1,%2";
5285})
5286
5287(define_expand "insert4b"
5288  [(set (match_operand:V16QI 0 "vsx_register_operand")
5289	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5290		       (match_operand:V16QI 2 "vsx_register_operand")
5291		       (match_operand:QI 3 "const_0_to_12_operand")]
5292		   UNSPEC_XXINSERTW))]
5293  "TARGET_P9_VECTOR"
5294{
5295  if (!BYTES_BIG_ENDIAN)
5296    {
5297      rtx op1 = operands[1];
5298      rtx v4si_tmp = gen_reg_rtx (V4SImode);
5299      emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5300      operands[1] = v4si_tmp;
5301      operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5302    }
5303})
5304
5305(define_insn "*insert4b_internal"
5306  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5307	(unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5308		       (match_operand:V16QI 2 "vsx_register_operand" "0")
5309		       (match_operand:QI 3 "const_0_to_12_operand" "n")]
5310		   UNSPEC_XXINSERTW))]
5311  "TARGET_P9_VECTOR"
5312  "xxinsertw %x0,%x1,%3"
5313  [(set_attr "type" "vecperm")])
5314
5315
5316;; Generate vector extract four float 32 values from left four elements
5317;; of eight element vector of float 16 values.
5318(define_expand "vextract_fp_from_shorth"
5319  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5320	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5321   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5322  "TARGET_P9_VECTOR"
5323{
5324  int i;
5325  int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5326  int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5327
5328  rtx rvals[16];
5329  rtx mask = gen_reg_rtx (V16QImode);
5330  rtx tmp = gen_reg_rtx (V16QImode);
5331  rtvec v;
5332
5333  for (i = 0; i < 16; i++)
5334    if (!BYTES_BIG_ENDIAN)
5335      rvals[i] = GEN_INT (vals_le[i]);
5336    else
5337      rvals[i] = GEN_INT (vals_be[i]);
5338
5339  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5340     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5341     src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5342     conversion instruction.  */
5343  v = gen_rtvec_v (16, rvals);
5344  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5345  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5346					  operands[1], mask));
5347  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5348  DONE;
5349})
5350
5351;; Generate vector extract four float 32 values from right four elements
5352;; of eight element vector of float 16 values.
5353(define_expand "vextract_fp_from_shortl"
5354  [(set (match_operand:V4SF 0 "register_operand" "=wa")
5355	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5356	UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5357  "TARGET_P9_VECTOR"
5358{
5359  int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5360  int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5361
5362  int i;
5363  rtx rvals[16];
5364  rtx mask = gen_reg_rtx (V16QImode);
5365  rtx tmp = gen_reg_rtx (V16QImode);
5366  rtvec v;
5367
5368  for (i = 0; i < 16; i++)
5369    if (!BYTES_BIG_ENDIAN)
5370      rvals[i] = GEN_INT (vals_le[i]);
5371    else
5372      rvals[i] = GEN_INT (vals_be[i]);
5373
5374  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5375     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5376     src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5377     conversion instruction.  */
5378  v = gen_rtvec_v (16, rvals);
5379  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5380  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5381					  operands[1], mask));
5382  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5383  DONE;
5384})
5385
5386;; Support for ISA 3.0 vector byte reverse
5387
5388;; Swap all bytes with in a vector
5389(define_insn "p9_xxbrq_v1ti"
5390  [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5391	(bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5392  "TARGET_P9_VECTOR"
5393  "xxbrq %x0,%x1"
5394  [(set_attr "type" "vecperm")])
5395
5396(define_expand "p9_xxbrq_v16qi"
5397  [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5398   (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5399  "TARGET_P9_VECTOR"
5400{
5401  rtx op0 = gen_reg_rtx (V1TImode);
5402  rtx op1 = gen_lowpart (V1TImode, operands[1]);
5403  emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5404  emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5405  DONE;
5406})
5407
5408;; Swap all bytes in each 64-bit element
5409(define_insn "p9_xxbrd_v2di"
5410  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5411	(bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5412  "TARGET_P9_VECTOR"
5413  "xxbrd %x0,%x1"
5414  [(set_attr "type" "vecperm")])
5415
5416(define_expand "p9_xxbrd_v2df"
5417  [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5418   (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5419  "TARGET_P9_VECTOR"
5420{
5421  rtx op0 = gen_reg_rtx (V2DImode);
5422  rtx op1 = gen_lowpart (V2DImode, operands[1]);
5423  emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5424  emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5425  DONE;
5426})
5427
5428;; Swap all bytes in each 32-bit element
5429(define_insn "p9_xxbrw_v4si"
5430  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5431	(bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
5432  "TARGET_P9_VECTOR"
5433  "xxbrw %x0,%x1"
5434  [(set_attr "type" "vecperm")])
5435
5436(define_expand "p9_xxbrw_v4sf"
5437  [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5438   (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5439  "TARGET_P9_VECTOR"
5440{
5441  rtx op0 = gen_reg_rtx (V4SImode);
5442  rtx op1 = gen_lowpart (V4SImode, operands[1]);
5443  emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5444  emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5445  DONE;
5446})
5447
5448;; Swap all bytes in each element of vector
5449(define_expand "revb_<mode>"
5450  [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5451   (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
5452  ""
5453{
5454  if (TARGET_P9_VECTOR)
5455    emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5456  else
5457    {
5458      /* Want to have the elements in reverse order relative
5459	 to the endian mode in use, i.e. in LE mode, put elements
5460	 in BE order.  */
5461      rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5462      emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5463					   operands[1], sel));
5464    }
5465
5466  DONE;
5467})
5468
5469;; Reversing bytes in vector char is just a NOP.
5470(define_expand "revb_v16qi"
5471  [(set (match_operand:V16QI 0 "vsx_register_operand")
5472	(bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5473  ""
5474{
5475  emit_move_insn (operands[0], operands[1]);
5476  DONE;
5477})
5478
5479;; Swap all bytes in each 16-bit element
5480(define_insn "p9_xxbrh_v8hi"
5481  [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5482	(bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5483  "TARGET_P9_VECTOR"
5484  "xxbrh %x0,%x1"
5485  [(set_attr "type" "vecperm")])
5486
5487
5488;; Operand numbers for the following peephole2
5489(define_constants
5490  [(SFBOOL_TMP_GPR		 0)		;; GPR temporary
5491   (SFBOOL_TMP_VSX		 1)		;; vector temporary
5492   (SFBOOL_MFVSR_D		 2)		;; move to gpr dest
5493   (SFBOOL_MFVSR_A		 3)		;; move to gpr src
5494   (SFBOOL_BOOL_D		 4)		;; and/ior/xor dest
5495   (SFBOOL_BOOL_A1		 5)		;; and/ior/xor arg1
5496   (SFBOOL_BOOL_A2		 6)		;; and/ior/xor arg1
5497   (SFBOOL_SHL_D		 7)		;; shift left dest
5498   (SFBOOL_SHL_A		 8)		;; shift left arg
5499   (SFBOOL_MTVSR_D		 9)		;; move to vecter dest
5500   (SFBOOL_MFVSR_A_V4SF		10)		;; SFBOOL_MFVSR_A as V4SFmode
5501   (SFBOOL_BOOL_A_DI		11)		;; SFBOOL_BOOL_A1/A2 as DImode
5502   (SFBOOL_TMP_VSX_DI		12)		;; SFBOOL_TMP_VSX as DImode
5503   (SFBOOL_MTVSR_D_V4SF		13)])		;; SFBOOL_MTVSRD_D as V4SFmode
5504
5505;; Attempt to optimize some common GLIBC operations using logical operations to
5506;; pick apart SFmode operations.  For example, there is code from e_powf.c
5507;; after macro expansion that looks like:
5508;;
5509;;	typedef union {
5510;;	  float value;
5511;;	  uint32_t word;
5512;;	} ieee_float_shape_type;
5513;;
5514;;	float t1;
5515;;	int32_t is;
5516;;
5517;;	do {
5518;;	  ieee_float_shape_type gf_u;
5519;;	  gf_u.value = (t1);
5520;;	  (is) = gf_u.word;
5521;;	} while (0);
5522;;
5523;;	do {
5524;;	  ieee_float_shape_type sf_u;
5525;;	  sf_u.word = (is & 0xfffff000);
5526;;	  (t1) = sf_u.value;
5527;;	} while (0);
5528;;
5529;;
5530;; This would result in two direct move operations (convert to memory format,
5531;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5532;; scalar format).  With this peephole, we eliminate the direct move to the
5533;; GPR, and instead move the integer mask value to the vector register after a
5534;; shift and do the VSX logical operation.
5535
5536;; The insns for dealing with SFmode in GPR registers looks like:
5537;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5538;;
5539;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5540;;
5541;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
5542;;
5543;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
5544;;
5545;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
5546;;
5547;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
5548
5549(define_peephole2
5550  [(match_scratch:DI SFBOOL_TMP_GPR "r")
5551   (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5552
5553   ;; MFVSRWZ (aka zero_extend)
5554   (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
5555	(zero_extend:DI
5556	 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
5557
5558   ;; AND/IOR/XOR operation on int
5559   (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5560	(and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5561			(match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5562
5563   ;; SLDI
5564   (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5565	(ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5566		   (const_int 32)))
5567
5568   ;; MTVSRD
5569   (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5570	(unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5571
5572  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5573   /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5574      to compare registers, when the mode is different.  */
5575   && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5576   && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5577   && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
5578   && (REG_P (operands[SFBOOL_BOOL_A2])
5579       || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5580   && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
5581       || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
5582   && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5583       || (REG_P (operands[SFBOOL_BOOL_A2])
5584	   && REGNO (operands[SFBOOL_MFVSR_D])
5585		== REGNO (operands[SFBOOL_BOOL_A2])))
5586   && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5587   && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
5588       || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5589   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
5590  [(set (match_dup SFBOOL_TMP_GPR)
5591	(ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5592		   (const_int 32)))
5593
5594   (set (match_dup SFBOOL_TMP_VSX_DI)
5595	(match_dup SFBOOL_TMP_GPR))
5596
5597   (set (match_dup SFBOOL_MTVSR_D_V4SF)
5598	(and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
5599			  (match_dup SFBOOL_TMP_VSX)))]
5600{
5601  rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5602  rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5603  int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
5604  int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
5605  int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5606  int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5607
5608  if (CONST_INT_P (bool_a2))
5609    {
5610      rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5611      emit_move_insn (tmp_gpr, bool_a2);
5612      operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5613    }
5614  else
5615    {
5616      int regno_bool_a1 = REGNO (bool_a1);
5617      int regno_bool_a2 = REGNO (bool_a2);
5618      int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5619			  ? regno_bool_a2 : regno_bool_a1);
5620      operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5621    }
5622
5623  operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
5624  operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5625  operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5626})
5627
5628;; Support signed/unsigned long long to float conversion vectorization.
5629;; Note that any_float (pc) here is just for code attribute <su>.
5630(define_expand "vec_pack<su>_float_v2di"
5631  [(match_operand:V4SF 0 "vfloat_operand")
5632   (match_operand:V2DI 1 "vint_operand")
5633   (match_operand:V2DI 2 "vint_operand")
5634   (any_float (pc))]
5635  "TARGET_VSX"
5636{
5637  rtx r1 = gen_reg_rtx (V4SFmode);
5638  rtx r2 = gen_reg_rtx (V4SFmode);
5639  emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
5640  emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
5641  rs6000_expand_extract_even (operands[0], r1, r2);
5642  DONE;
5643})
5644
5645;; Support float to signed/unsigned long long conversion vectorization.
5646;; Note that any_fix (pc) here is just for code attribute <su>.
5647(define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
5648  [(match_operand:V2DI 0 "vint_operand")
5649   (match_operand:V4SF 1 "vfloat_operand")
5650   (any_fix (pc))]
5651  "TARGET_VSX"
5652{
5653  rtx reg = gen_reg_rtx (V4SFmode);
5654  rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
5655  emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5656  DONE;
5657})
5658
5659;; Note that any_fix (pc) here is just for code attribute <su>.
5660(define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
5661  [(match_operand:V2DI 0 "vint_operand")
5662   (match_operand:V4SF 1 "vfloat_operand")
5663   (any_fix (pc))]
5664  "TARGET_VSX"
5665{
5666  rtx reg = gen_reg_rtx (V4SFmode);
5667  rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
5668  emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
5669  DONE;
5670})
5671
5672(define_insn "vsx_<xvcvbf16>"
5673  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5674	(unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
5675		      XVCVBF16))]
5676  "TARGET_POWER10"
5677  "<xvcvbf16> %x0,%x1"
5678  [(set_attr "type" "vecfloat")])
5679