xref: /dragonfly/contrib/gcc-4.7/gcc/config/i386/sse.md (revision b575ab8a)
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3;; Free Software Foundation, Inc.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_c_enum "unspec" [
22  ;; SSE
23  UNSPEC_MOVNT
24  UNSPEC_MOVU
25
26  ;; SSE3
27  UNSPEC_LDDQU
28
29  ;; SSSE3
30  UNSPEC_PSHUFB
31  UNSPEC_PSIGN
32  UNSPEC_PALIGNR
33
34  ;; For SSE4A support
35  UNSPEC_EXTRQI
36  UNSPEC_EXTRQ
37  UNSPEC_INSERTQI
38  UNSPEC_INSERTQ
39
40  ;; For SSE4.1 support
41  UNSPEC_BLENDV
42  UNSPEC_INSERTPS
43  UNSPEC_DP
44  UNSPEC_MOVNTDQA
45  UNSPEC_MPSADBW
46  UNSPEC_PHMINPOSUW
47  UNSPEC_PTEST
48
49  ;; For SSE4.2 support
50  UNSPEC_PCMPESTR
51  UNSPEC_PCMPISTR
52
53  ;; For FMA4 support
54  UNSPEC_FMADDSUB
55  UNSPEC_XOP_UNSIGNED_CMP
56  UNSPEC_XOP_TRUEFALSE
57  UNSPEC_XOP_PERMUTE
58  UNSPEC_FRCZ
59
60  ;; For AES support
61  UNSPEC_AESENC
62  UNSPEC_AESENCLAST
63  UNSPEC_AESDEC
64  UNSPEC_AESDECLAST
65  UNSPEC_AESIMC
66  UNSPEC_AESKEYGENASSIST
67
68  ;; For PCLMUL support
69  UNSPEC_PCLMUL
70
71  ;; For AVX support
72  UNSPEC_PCMP
73  UNSPEC_VPERMIL
74  UNSPEC_VPERMIL2
75  UNSPEC_VPERMIL2F128
76  UNSPEC_CAST
77  UNSPEC_VTESTP
78  UNSPEC_VCVTPH2PS
79  UNSPEC_VCVTPS2PH
80
81  ;; For AVX2 support
82  UNSPEC_VPERMSI
83  UNSPEC_VPERMDF
84  UNSPEC_VPERMSF
85  UNSPEC_VPERMTI
86  UNSPEC_GATHER
87  UNSPEC_VSIBADDR
88])
89
90(define_c_enum "unspecv" [
91  UNSPECV_LDMXCSR
92  UNSPECV_STMXCSR
93  UNSPECV_CLFLUSH
94  UNSPECV_MONITOR
95  UNSPECV_MWAIT
96  UNSPECV_VZEROALL
97  UNSPECV_VZEROUPPER
98])
99
100;; All vector modes including V?TImode, used in move patterns.
101(define_mode_iterator V16
102  [(V32QI "TARGET_AVX") V16QI
103   (V16HI "TARGET_AVX") V8HI
104   (V8SI "TARGET_AVX") V4SI
105   (V4DI "TARGET_AVX") V2DI
106   (V2TI "TARGET_AVX") V1TI
107   (V8SF "TARGET_AVX") V4SF
108   (V4DF "TARGET_AVX") V2DF])
109
110;; All vector modes
111(define_mode_iterator V
112  [(V32QI "TARGET_AVX") V16QI
113   (V16HI "TARGET_AVX") V8HI
114   (V8SI "TARGET_AVX") V4SI
115   (V4DI "TARGET_AVX") V2DI
116   (V8SF "TARGET_AVX") V4SF
117   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
118
119;; All 128bit vector modes
120(define_mode_iterator V_128
121  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
122
123;; All 256bit vector modes
124(define_mode_iterator V_256
125  [V32QI V16HI V8SI V4DI V8SF V4DF])
126
127;; All vector float modes
128(define_mode_iterator VF
129  [(V8SF "TARGET_AVX") V4SF
130   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
131
132;; All SFmode vector float modes
133(define_mode_iterator VF1
134  [(V8SF "TARGET_AVX") V4SF])
135
136;; All DFmode vector float modes
137(define_mode_iterator VF2
138  [(V4DF "TARGET_AVX") V2DF])
139
140;; All 128bit vector float modes
141(define_mode_iterator VF_128
142  [V4SF (V2DF "TARGET_SSE2")])
143
144;; All 256bit vector float modes
145(define_mode_iterator VF_256
146  [V8SF V4DF])
147
148;; All vector integer modes
149(define_mode_iterator VI
150  [(V32QI "TARGET_AVX") V16QI
151   (V16HI "TARGET_AVX") V8HI
152   (V8SI "TARGET_AVX") V4SI
153   (V4DI "TARGET_AVX") V2DI])
154
155(define_mode_iterator VI_AVX2
156  [(V32QI "TARGET_AVX2") V16QI
157   (V16HI "TARGET_AVX2") V8HI
158   (V8SI "TARGET_AVX2") V4SI
159   (V4DI "TARGET_AVX2") V2DI])
160
161;; All QImode vector integer modes
162(define_mode_iterator VI1
163  [(V32QI "TARGET_AVX") V16QI])
164
165;; All DImode vector integer modes
166(define_mode_iterator VI8
167  [(V4DI "TARGET_AVX") V2DI])
168
169(define_mode_iterator VI1_AVX2
170  [(V32QI "TARGET_AVX2") V16QI])
171
172(define_mode_iterator VI2_AVX2
173  [(V16HI "TARGET_AVX2") V8HI])
174
175(define_mode_iterator VI4_AVX2
176  [(V8SI "TARGET_AVX2") V4SI])
177
178(define_mode_iterator VI8_AVX2
179  [(V4DI "TARGET_AVX2") V2DI])
180
181;; ??? We should probably use TImode instead.
182(define_mode_iterator VIMAX_AVX2
183  [(V2TI "TARGET_AVX2") V1TI])
184
185;; ??? This should probably be dropped in favor of VIMAX_AVX2.
186(define_mode_iterator SSESCALARMODE
187  [(V2TI "TARGET_AVX2") TI])
188
189(define_mode_iterator VI12_AVX2
190  [(V32QI "TARGET_AVX2") V16QI
191   (V16HI "TARGET_AVX2") V8HI])
192
193(define_mode_iterator VI24_AVX2
194  [(V16HI "TARGET_AVX2") V8HI
195   (V8SI "TARGET_AVX2") V4SI])
196
197(define_mode_iterator VI124_AVX2
198  [(V32QI "TARGET_AVX2") V16QI
199   (V16HI "TARGET_AVX2") V8HI
200   (V8SI "TARGET_AVX2") V4SI])
201
202(define_mode_iterator VI248_AVX2
203  [(V16HI "TARGET_AVX2") V8HI
204   (V8SI "TARGET_AVX2") V4SI
205   (V4DI "TARGET_AVX2") V2DI])
206
207(define_mode_iterator VI48_AVX2
208  [(V8SI "TARGET_AVX2") V4SI
209   (V4DI "TARGET_AVX2") V2DI])
210
211(define_mode_iterator V48_AVX2
212  [V4SF V2DF
213   V8SF V4DF
214   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
215   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
216
217(define_mode_attr sse2_avx2
218  [(V16QI "sse2") (V32QI "avx2")
219   (V8HI "sse2") (V16HI "avx2")
220   (V4SI "sse2") (V8SI "avx2")
221   (V2DI "sse2") (V4DI "avx2")
222   (V1TI "sse2") (V2TI "avx2")])
223
224(define_mode_attr ssse3_avx2
225   [(V16QI "ssse3") (V32QI "avx2")
226    (V8HI "ssse3") (V16HI "avx2")
227    (V4SI "ssse3") (V8SI "avx2")
228    (V2DI "ssse3") (V4DI "avx2")
229    (TI "ssse3") (V2TI "avx2")])
230
231(define_mode_attr sse4_1_avx2
232   [(V16QI "sse4_1") (V32QI "avx2")
233    (V8HI "sse4_1") (V16HI "avx2")
234    (V4SI "sse4_1") (V8SI "avx2")
235    (V2DI "sse4_1") (V4DI "avx2")])
236
237(define_mode_attr avx_avx2
238  [(V4SF "avx") (V2DF "avx")
239   (V8SF "avx") (V4DF "avx")
240   (V4SI "avx2") (V2DI "avx2")
241   (V8SI "avx2") (V4DI "avx2")])
242
243(define_mode_attr vec_avx2
244  [(V16QI "vec") (V32QI "avx2")
245   (V8HI "vec") (V16HI "avx2")
246   (V4SI "vec") (V8SI "avx2")
247   (V2DI "vec") (V4DI "avx2")])
248
249(define_mode_attr ssedoublemode
250  [(V16HI "V16SI") (V8HI "V8SI")])
251
252(define_mode_attr ssebytemode
253  [(V4DI "V32QI") (V2DI "V16QI")])
254
255;; All 128bit vector integer modes
256(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
257
258;; All 256bit vector integer modes
259(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
260
261;; Random 128bit vector integer mode combinations
262(define_mode_iterator VI12_128 [V16QI V8HI])
263(define_mode_iterator VI14_128 [V16QI V4SI])
264(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
265(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
266(define_mode_iterator VI24_128 [V8HI V4SI])
267(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
268(define_mode_iterator VI48_128 [V4SI V2DI])
269
270;; Random 256bit vector integer mode combinations
271(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
272(define_mode_iterator VI48_256 [V8SI V4DI])
273
274;; Int-float size matches
275(define_mode_iterator VI4F_128 [V4SI V4SF])
276(define_mode_iterator VI8F_128 [V2DI V2DF])
277(define_mode_iterator VI4F_256 [V8SI V8SF])
278(define_mode_iterator VI8F_256 [V4DI V4DF])
279
280;; Mapping from float mode to required SSE level
281(define_mode_attr sse
282  [(SF "sse") (DF "sse2")
283   (V4SF "sse") (V2DF "sse2")
284   (V8SF "avx") (V4DF "avx")])
285
286(define_mode_attr sse2
287  [(V16QI "sse2") (V32QI "avx")
288   (V2DI "sse2") (V4DI "avx")])
289
290(define_mode_attr sse3
291  [(V16QI "sse3") (V32QI "avx")])
292
293(define_mode_attr sse4_1
294  [(V4SF "sse4_1") (V2DF "sse4_1")
295   (V8SF "avx") (V4DF "avx")])
296
297(define_mode_attr avxsizesuffix
298  [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
299   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
300   (V8SF "256") (V4DF "256")
301   (V4SF "") (V2DF "")])
302
303;; SSE instruction mode
304(define_mode_attr sseinsnmode
305  [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
306   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
307   (V8SF "V8SF") (V4DF "V4DF")
308   (V4SF "V4SF") (V2DF "V2DF")
309   (TI "TI")])
310
311;; Mapping of vector float modes to an integer mode of the same size
312(define_mode_attr sseintvecmode
313  [(V8SF "V8SI") (V4DF "V4DI")
314   (V4SF "V4SI") (V2DF "V2DI")
315   (V8SI "V8SI") (V4DI "V4DI")
316   (V4SI "V4SI") (V2DI "V2DI")
317   (V16HI "V16HI") (V8HI "V8HI")
318   (V32QI "V32QI") (V16QI "V16QI")])
319
320(define_mode_attr sseintvecmodelower
321  [(V8SF "v8si") (V4DF "v4di")
322   (V4SF "v4si") (V2DF "v2di")
323   (V8SI "v8si") (V4DI "v4di")
324   (V4SI "v4si") (V2DI "v2di")
325   (V16HI "v16hi") (V8HI "v8hi")
326   (V32QI "v32qi") (V16QI "v16qi")])
327
328;; Mapping of vector modes to a vector mode of double size
329(define_mode_attr ssedoublevecmode
330  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
331   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
332   (V8SF "V16SF") (V4DF "V8DF")
333   (V4SF "V8SF") (V2DF "V4DF")])
334
335;; Mapping of vector modes to a vector mode of half size
336(define_mode_attr ssehalfvecmode
337  [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
338   (V16QI  "V8QI") (V8HI  "V4HI") (V4SI "V2SI")
339   (V8SF "V4SF") (V4DF "V2DF")
340   (V4SF "V2SF")])
341
342;; Mapping of vector modes back to the scalar modes
343(define_mode_attr ssescalarmode
344  [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
345   (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
346   (V8SF "SF") (V4DF "DF")
347   (V4SF "SF") (V2DF "DF")])
348
349;; Number of scalar elements in each vector type
350(define_mode_attr ssescalarnum
351  [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
352   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
353   (V8SF "8") (V4DF "4")
354   (V4SF "4") (V2DF "2")])
355
356;; SSE prefix for integer vector modes
357(define_mode_attr sseintprefix
358  [(V2DI "p") (V2DF "")
359   (V4DI "p") (V4DF "")
360   (V4SI "p") (V4SF "")
361   (V8SI "p") (V8SF "")])
362
363;; SSE scalar suffix for vector modes
364(define_mode_attr ssescalarmodesuffix
365  [(SF "ss") (DF "sd")
366   (V8SF "ss") (V4DF "sd")
367   (V4SF "ss") (V2DF "sd")
368   (V8SI "ss") (V4DI "sd")
369   (V4SI "d")])
370
371;; Pack/unpack vector modes
372(define_mode_attr sseunpackmode
373  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
374   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
375
376(define_mode_attr ssepackmode
377  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
378   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
379
380;; Mapping of the max integer size for xop rotate immediate constraint
381(define_mode_attr sserotatemax
382  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
383
384;; Mapping of mode to cast intrinsic name
385(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
386
387;; Instruction suffix for sign and zero extensions.
388(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
389
390;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
391(define_mode_attr i128
392  [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
393   (V8SI "%~128") (V4DI "%~128")])
394
395;; Mix-n-match
396(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
397
398;; Mapping of immediate bits for blend instructions
399(define_mode_attr blendbits
400  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
401
402;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
403
404;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
405;;
406;; Move patterns
407;;
408;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
409
410;; All of these patterns are enabled for SSE1 as well as SSE2.
411;; This is essential for maintaining stable calling conventions.
412
413(define_expand "mov<mode>"
414  [(set (match_operand:V16 0 "nonimmediate_operand" "")
415	(match_operand:V16 1 "nonimmediate_operand" ""))]
416  "TARGET_SSE"
417{
418  ix86_expand_vector_move (<MODE>mode, operands);
419  DONE;
420})
421
422(define_insn "*mov<mode>_internal"
423  [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
424	(match_operand:V16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
425  "TARGET_SSE
426   && (register_operand (operands[0], <MODE>mode)
427       || register_operand (operands[1], <MODE>mode))"
428{
429  switch (which_alternative)
430    {
431    case 0:
432      return standard_sse_constant_opcode (insn, operands[1]);
433    case 1:
434    case 2:
435      switch (get_attr_mode (insn))
436	{
437	case MODE_V8SF:
438	case MODE_V4SF:
439	  if (TARGET_AVX
440	      && (misaligned_operand (operands[0], <MODE>mode)
441		  || misaligned_operand (operands[1], <MODE>mode)))
442	    return "vmovups\t{%1, %0|%0, %1}";
443	  else
444	    return "%vmovaps\t{%1, %0|%0, %1}";
445
446	case MODE_V4DF:
447	case MODE_V2DF:
448	  if (TARGET_AVX
449	      && (misaligned_operand (operands[0], <MODE>mode)
450		  || misaligned_operand (operands[1], <MODE>mode)))
451	    return "vmovupd\t{%1, %0|%0, %1}";
452	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
453	    return "%vmovaps\t{%1, %0|%0, %1}";
454	  else
455	    return "%vmovapd\t{%1, %0|%0, %1}";
456
457	case MODE_OI:
458	case MODE_TI:
459	  if (TARGET_AVX
460	      && (misaligned_operand (operands[0], <MODE>mode)
461		  || misaligned_operand (operands[1], <MODE>mode)))
462	    return "vmovdqu\t{%1, %0|%0, %1}";
463	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
464	    return "%vmovaps\t{%1, %0|%0, %1}";
465	  else
466	    return "%vmovdqa\t{%1, %0|%0, %1}";
467
468	default:
469	  gcc_unreachable ();
470	}
471    default:
472      gcc_unreachable ();
473    }
474}
475  [(set_attr "type" "sselog1,ssemov,ssemov")
476   (set_attr "prefix" "maybe_vex")
477   (set (attr "mode")
478	(cond [(match_test "TARGET_AVX")
479		 (const_string "<sseinsnmode>")
480	       (ior (ior (match_test "optimize_function_for_size_p (cfun)")
481			 (not (match_test "TARGET_SSE2")))
482		    (and (eq_attr "alternative" "2")
483			 (match_test "TARGET_SSE_TYPELESS_STORES")))
484		 (const_string "V4SF")
485	       (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
486		 (const_string "V4SF")
487	       (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
488		 (const_string "V2DF")
489	      ]
490	  (const_string "TI")))])
491
492(define_insn "sse2_movq128"
493  [(set (match_operand:V2DI 0 "register_operand" "=x")
494	(vec_concat:V2DI
495	  (vec_select:DI
496	    (match_operand:V2DI 1 "nonimmediate_operand" "xm")
497	    (parallel [(const_int 0)]))
498	  (const_int 0)))]
499  "TARGET_SSE2"
500  "%vmovq\t{%1, %0|%0, %1}"
501  [(set_attr "type" "ssemov")
502   (set_attr "prefix" "maybe_vex")
503   (set_attr "mode" "TI")])
504
505;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
506;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
507;; from memory, we'd prefer to load the memory directly into the %xmm
508;; register.  To facilitate this happy circumstance, this pattern won't
509;; split until after register allocation.  If the 64-bit value didn't
510;; come from memory, this is the best we can do.  This is much better
511;; than storing %edx:%eax into a stack temporary and loading an %xmm
512;; from there.
513
514(define_insn_and_split "movdi_to_sse"
515  [(parallel
516    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
517	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
518     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
519  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
520  "#"
521  "&& reload_completed"
522  [(const_int 0)]
523{
524 if (register_operand (operands[1], DImode))
525   {
526      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
527	 Assemble the 64-bit DImode value in an xmm register.  */
528      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
529				  gen_rtx_SUBREG (SImode, operands[1], 0)));
530      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
531				  gen_rtx_SUBREG (SImode, operands[1], 4)));
532      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
533					     operands[2]));
534    }
535 else if (memory_operand (operands[1], DImode))
536   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
537				  operands[1], const0_rtx));
538 else
539   gcc_unreachable ();
540})
541
542(define_split
543  [(set (match_operand:V4SF 0 "register_operand" "")
544	(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
545  "TARGET_SSE && reload_completed"
546  [(set (match_dup 0)
547	(vec_merge:V4SF
548	  (vec_duplicate:V4SF (match_dup 1))
549	  (match_dup 2)
550	  (const_int 1)))]
551{
552  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
553  operands[2] = CONST0_RTX (V4SFmode);
554})
555
556(define_split
557  [(set (match_operand:V2DF 0 "register_operand" "")
558	(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
559  "TARGET_SSE2 && reload_completed"
560  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
561{
562  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
563  operands[2] = CONST0_RTX (DFmode);
564})
565
566(define_expand "push<mode>1"
567  [(match_operand:V16 0 "register_operand" "")]
568  "TARGET_SSE"
569{
570  ix86_expand_push (<MODE>mode, operands[0]);
571  DONE;
572})
573
574(define_expand "movmisalign<mode>"
575  [(set (match_operand:V16 0 "nonimmediate_operand" "")
576	(match_operand:V16 1 "nonimmediate_operand" ""))]
577  "TARGET_SSE"
578{
579  ix86_expand_vector_move_misalign (<MODE>mode, operands);
580  DONE;
581})
582
583(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>"
584  [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
585	(unspec:VF
586	  [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
587	  UNSPEC_MOVU))]
588  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
589  "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
590  [(set_attr "type" "ssemov")
591   (set_attr "movu" "1")
592   (set_attr "prefix" "maybe_vex")
593   (set_attr "mode" "<MODE>")])
594
595(define_insn "<sse2>_movdqu<avxsizesuffix>"
596  [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
597	(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
598		    UNSPEC_MOVU))]
599  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
600  "%vmovdqu\t{%1, %0|%0, %1}"
601  [(set_attr "type" "ssemov")
602   (set_attr "movu" "1")
603   (set (attr "prefix_data16")
604     (if_then_else
605       (match_test "TARGET_AVX")
606     (const_string "*")
607     (const_string "1")))
608   (set_attr "prefix" "maybe_vex")
609   (set_attr "mode" "<sseinsnmode>")])
610
611(define_insn "<sse3>_lddqu<avxsizesuffix>"
612  [(set (match_operand:VI1 0 "register_operand" "=x")
613	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
614		    UNSPEC_LDDQU))]
615  "TARGET_SSE3"
616  "%vlddqu\t{%1, %0|%0, %1}"
617  [(set_attr "type" "ssemov")
618   (set_attr "movu" "1")
619   (set (attr "prefix_data16")
620     (if_then_else
621       (match_test "TARGET_AVX")
622     (const_string "*")
623     (const_string "0")))
624   (set (attr "prefix_rep")
625     (if_then_else
626       (match_test "TARGET_AVX")
627     (const_string "*")
628     (const_string "1")))
629   (set_attr "prefix" "maybe_vex")
630   (set_attr "mode" "<sseinsnmode>")])
631
632(define_insn "sse2_movnti<mode>"
633  [(set (match_operand:SWI48 0 "memory_operand" "=m")
634	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
635		      UNSPEC_MOVNT))]
636  "TARGET_SSE2"
637  "movnti\t{%1, %0|%0, %1}"
638  [(set_attr "type" "ssemov")
639   (set_attr "prefix_data16" "0")
640   (set_attr "mode" "<MODE>")])
641
642(define_insn "<sse>_movnt<mode>"
643  [(set (match_operand:VF 0 "memory_operand" "=m")
644	(unspec:VF [(match_operand:VF 1 "register_operand" "x")]
645		   UNSPEC_MOVNT))]
646  "TARGET_SSE"
647  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
648  [(set_attr "type" "ssemov")
649   (set_attr "prefix" "maybe_vex")
650   (set_attr "mode" "<MODE>")])
651
652(define_insn "<sse2>_movnt<mode>"
653  [(set (match_operand:VI8 0 "memory_operand" "=m")
654	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
655		    UNSPEC_MOVNT))]
656  "TARGET_SSE2"
657  "%vmovntdq\t{%1, %0|%0, %1}"
658  [(set_attr "type" "ssecvt")
659   (set (attr "prefix_data16")
660     (if_then_else
661       (match_test "TARGET_AVX")
662     (const_string "*")
663     (const_string "1")))
664   (set_attr "prefix" "maybe_vex")
665   (set_attr "mode" "<sseinsnmode>")])
666
667; Expand patterns for non-temporal stores.  At the moment, only those
668; that directly map to insns are defined; it would be possible to
669; define patterns for other modes that would expand to several insns.
670
671;; Modes handled by storent patterns.
672(define_mode_iterator STORENT_MODE
673  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
674   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
675   (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
676   (V8SF "TARGET_AVX") V4SF
677   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
678
679(define_expand "storent<mode>"
680  [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
681	(unspec:STORENT_MODE
682	  [(match_operand:STORENT_MODE 1 "register_operand" "")]
683	  UNSPEC_MOVNT))]
684  "TARGET_SSE")
685
686;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
687;;
688;; Parallel floating point arithmetic
689;;
690;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
691
692(define_expand "<code><mode>2"
693  [(set (match_operand:VF 0 "register_operand" "")
694	(absneg:VF
695	  (match_operand:VF 1 "register_operand" "")))]
696  "TARGET_SSE"
697  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
698
699(define_insn_and_split "*absneg<mode>2"
700  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
701	(match_operator:VF 3 "absneg_operator"
702	  [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
703   (use (match_operand:VF 2 "nonimmediate_operand"    "xm,0, xm,x"))]
704  "TARGET_SSE"
705  "#"
706  "&& reload_completed"
707  [(const_int 0)]
708{
709  enum rtx_code absneg_op;
710  rtx op1, op2;
711  rtx t;
712
713  if (TARGET_AVX)
714    {
715      if (MEM_P (operands[1]))
716	op1 = operands[2], op2 = operands[1];
717      else
718	op1 = operands[1], op2 = operands[2];
719    }
720  else
721    {
722      op1 = operands[0];
723      if (rtx_equal_p (operands[0], operands[1]))
724	op2 = operands[2];
725      else
726	op2 = operands[1];
727    }
728
729  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
730  t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
731  t = gen_rtx_SET (VOIDmode, operands[0], t);
732  emit_insn (t);
733  DONE;
734}
735  [(set_attr "isa" "noavx,noavx,avx,avx")])
736
737(define_expand "<plusminus_insn><mode>3"
738  [(set (match_operand:VF 0 "register_operand" "")
739	(plusminus:VF
740	  (match_operand:VF 1 "nonimmediate_operand" "")
741	  (match_operand:VF 2 "nonimmediate_operand" "")))]
742  "TARGET_SSE"
743  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
744
745(define_insn "*<plusminus_insn><mode>3"
746  [(set (match_operand:VF 0 "register_operand" "=x,x")
747	(plusminus:VF
748	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
749	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
750  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
751  "@
752   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
753   v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
754  [(set_attr "isa" "noavx,avx")
755   (set_attr "type" "sseadd")
756   (set_attr "prefix" "orig,vex")
757   (set_attr "mode" "<MODE>")])
758
759(define_insn "<sse>_vm<plusminus_insn><mode>3"
760  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
761	(vec_merge:VF_128
762	  (plusminus:VF_128
763	    (match_operand:VF_128 1 "register_operand" "0,x")
764	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
765	  (match_dup 1)
766	  (const_int 1)))]
767  "TARGET_SSE"
768  "@
769   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
770   v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
771  [(set_attr "isa" "noavx,avx")
772   (set_attr "type" "sseadd")
773   (set_attr "prefix" "orig,vex")
774   (set_attr "mode" "<ssescalarmode>")])
775
776(define_expand "mul<mode>3"
777  [(set (match_operand:VF 0 "register_operand" "")
778	(mult:VF
779	  (match_operand:VF 1 "nonimmediate_operand" "")
780	  (match_operand:VF 2 "nonimmediate_operand" "")))]
781  "TARGET_SSE"
782  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
783
784(define_insn "*mul<mode>3"
785  [(set (match_operand:VF 0 "register_operand" "=x,x")
786	(mult:VF
787	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
788	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
789  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
790  "@
791   mul<ssemodesuffix>\t{%2, %0|%0, %2}
792   vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
793  [(set_attr "isa" "noavx,avx")
794   (set_attr "type" "ssemul")
795   (set_attr "prefix" "orig,vex")
796   (set_attr "mode" "<MODE>")])
797
798(define_insn "<sse>_vmmul<mode>3"
799  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
800	(vec_merge:VF_128
801	  (mult:VF_128
802	    (match_operand:VF_128 1 "register_operand" "0,x")
803	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
804	  (match_dup 1)
805	  (const_int 1)))]
806  "TARGET_SSE"
807  "@
808   mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
809   vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
810  [(set_attr "isa" "noavx,avx")
811   (set_attr "type" "ssemul")
812   (set_attr "prefix" "orig,vex")
813   (set_attr "mode" "<ssescalarmode>")])
814
815(define_expand "div<mode>3"
816  [(set (match_operand:VF2 0 "register_operand" "")
817	(div:VF2 (match_operand:VF2 1 "register_operand" "")
818		 (match_operand:VF2 2 "nonimmediate_operand" "")))]
819  "TARGET_SSE2"
820  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
821
822(define_expand "div<mode>3"
823  [(set (match_operand:VF1 0 "register_operand" "")
824	(div:VF1 (match_operand:VF1 1 "register_operand" "")
825		 (match_operand:VF1 2 "nonimmediate_operand" "")))]
826  "TARGET_SSE"
827{
828  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
829
830  if (TARGET_SSE_MATH
831      && TARGET_RECIP_VEC_DIV
832      && !optimize_insn_for_size_p ()
833      && flag_finite_math_only && !flag_trapping_math
834      && flag_unsafe_math_optimizations)
835    {
836      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
837      DONE;
838    }
839})
840
841(define_insn "<sse>_div<mode>3"
842  [(set (match_operand:VF 0 "register_operand" "=x,x")
843	(div:VF
844	  (match_operand:VF 1 "register_operand" "0,x")
845	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
846  "TARGET_SSE"
847  "@
848   div<ssemodesuffix>\t{%2, %0|%0, %2}
849   vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
850  [(set_attr "isa" "noavx,avx")
851   (set_attr "type" "ssediv")
852   (set_attr "prefix" "orig,vex")
853   (set_attr "mode" "<MODE>")])
854
855(define_insn "<sse>_vmdiv<mode>3"
856  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
857	(vec_merge:VF_128
858	  (div:VF_128
859	    (match_operand:VF_128 1 "register_operand" "0,x")
860	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
861	  (match_dup 1)
862	  (const_int 1)))]
863  "TARGET_SSE"
864  "@
865   div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
866   vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
867  [(set_attr "isa" "noavx,avx")
868   (set_attr "type" "ssediv")
869   (set_attr "prefix" "orig,vex")
870   (set_attr "mode" "<ssescalarmode>")])
871
872(define_insn "<sse>_rcp<mode>2"
873  [(set (match_operand:VF1 0 "register_operand" "=x")
874	(unspec:VF1
875	  [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
876  "TARGET_SSE"
877  "%vrcpps\t{%1, %0|%0, %1}"
878  [(set_attr "type" "sse")
879   (set_attr "atom_sse_attr" "rcp")
880   (set_attr "prefix" "maybe_vex")
881   (set_attr "mode" "<MODE>")])
882
883(define_insn "sse_vmrcpv4sf2"
884  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
885	(vec_merge:V4SF
886	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
887		       UNSPEC_RCP)
888	  (match_operand:V4SF 2 "register_operand" "0,x")
889	  (const_int 1)))]
890  "TARGET_SSE"
891  "@
892   rcpss\t{%1, %0|%0, %1}
893   vrcpss\t{%1, %2, %0|%0, %2, %1}"
894  [(set_attr "isa" "noavx,avx")
895   (set_attr "type" "sse")
896   (set_attr "atom_sse_attr" "rcp")
897   (set_attr "prefix" "orig,vex")
898   (set_attr "mode" "SF")])
899
900(define_expand "sqrt<mode>2"
901  [(set (match_operand:VF2 0 "register_operand" "")
902	(sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
903  "TARGET_SSE2")
904
905(define_expand "sqrt<mode>2"
906  [(set (match_operand:VF1 0 "register_operand" "")
907	(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
908  "TARGET_SSE"
909{
910  if (TARGET_SSE_MATH
911      && TARGET_RECIP_VEC_SQRT
912      && !optimize_insn_for_size_p ()
913      && flag_finite_math_only && !flag_trapping_math
914      && flag_unsafe_math_optimizations)
915    {
916      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
917      DONE;
918    }
919})
920
921(define_insn "<sse>_sqrt<mode>2"
922  [(set (match_operand:VF 0 "register_operand" "=x")
923	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
924  "TARGET_SSE"
925  "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
926  [(set_attr "type" "sse")
927   (set_attr "atom_sse_attr" "sqrt")
928   (set_attr "prefix" "maybe_vex")
929   (set_attr "mode" "<MODE>")])
930
931(define_insn "<sse>_vmsqrt<mode>2"
932  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
933	(vec_merge:VF_128
934	  (sqrt:VF_128
935	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
936	  (match_operand:VF_128 2 "register_operand" "0,x")
937	  (const_int 1)))]
938  "TARGET_SSE"
939  "@
940   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
941   vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
942  [(set_attr "isa" "noavx,avx")
943   (set_attr "type" "sse")
944   (set_attr "atom_sse_attr" "sqrt")
945   (set_attr "prefix" "orig,vex")
946   (set_attr "mode" "<ssescalarmode>")])
947
948(define_expand "rsqrt<mode>2"
949  [(set (match_operand:VF1 0 "register_operand" "")
950	(unspec:VF1
951	  [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
952  "TARGET_SSE_MATH"
953{
954  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
955  DONE;
956})
957
958(define_insn "<sse>_rsqrt<mode>2"
959  [(set (match_operand:VF1 0 "register_operand" "=x")
960	(unspec:VF1
961	  [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
962  "TARGET_SSE"
963  "%vrsqrtps\t{%1, %0|%0, %1}"
964  [(set_attr "type" "sse")
965   (set_attr "prefix" "maybe_vex")
966   (set_attr "mode" "<MODE>")])
967
968(define_insn "sse_vmrsqrtv4sf2"
969  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
970	(vec_merge:V4SF
971	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
972		       UNSPEC_RSQRT)
973	  (match_operand:V4SF 2 "register_operand" "0,x")
974	  (const_int 1)))]
975  "TARGET_SSE"
976  "@
977   rsqrtss\t{%1, %0|%0, %1}
978   vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
979  [(set_attr "isa" "noavx,avx")
980   (set_attr "type" "sse")
981   (set_attr "prefix" "orig,vex")
982   (set_attr "mode" "SF")])
983
984;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
985;; isn't really correct, as those rtl operators aren't defined when
986;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
987
988(define_expand "<code><mode>3"
989  [(set (match_operand:VF 0 "register_operand" "")
990	(smaxmin:VF
991	  (match_operand:VF 1 "nonimmediate_operand" "")
992	  (match_operand:VF 2 "nonimmediate_operand" "")))]
993  "TARGET_SSE"
994{
995  if (!flag_finite_math_only)
996    operands[1] = force_reg (<MODE>mode, operands[1]);
997  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
998})
999
1000(define_insn "*<code><mode>3_finite"
1001  [(set (match_operand:VF 0 "register_operand" "=x,x")
1002	(smaxmin:VF
1003	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1004	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1005  "TARGET_SSE && flag_finite_math_only
1006   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1007  "@
1008   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1009   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1010  [(set_attr "isa" "noavx,avx")
1011   (set_attr "type" "sseadd")
1012   (set_attr "prefix" "orig,vex")
1013   (set_attr "mode" "<MODE>")])
1014
1015(define_insn "*<code><mode>3"
1016  [(set (match_operand:VF 0 "register_operand" "=x,x")
1017	(smaxmin:VF
1018	  (match_operand:VF 1 "register_operand" "0,x")
1019	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1020  "TARGET_SSE && !flag_finite_math_only"
1021  "@
1022   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1023   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1024  [(set_attr "isa" "noavx,avx")
1025   (set_attr "type" "sseadd")
1026   (set_attr "prefix" "orig,vex")
1027   (set_attr "mode" "<MODE>")])
1028
1029(define_insn "<sse>_vm<code><mode>3"
1030  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1031	(vec_merge:VF_128
1032	  (smaxmin:VF_128
1033	    (match_operand:VF_128 1 "register_operand" "0,x")
1034	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1035	 (match_dup 1)
1036	 (const_int 1)))]
1037  "TARGET_SSE"
1038  "@
1039   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1040   v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1041  [(set_attr "isa" "noavx,avx")
1042   (set_attr "type" "sse")
1043   (set_attr "prefix" "orig,vex")
1044   (set_attr "mode" "<ssescalarmode>")])
1045
1046;; These versions of the min/max patterns implement exactly the operations
1047;;   min = (op1 < op2 ? op1 : op2)
1048;;   max = (!(op1 < op2) ? op1 : op2)
1049;; Their operands are not commutative, and thus they may be used in the
1050;; presence of -0.0 and NaN.
1051
1052(define_insn "*ieee_smin<mode>3"
1053  [(set (match_operand:VF 0 "register_operand" "=x,x")
1054	(unspec:VF
1055	  [(match_operand:VF 1 "register_operand" "0,x")
1056	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1057	 UNSPEC_IEEE_MIN))]
1058  "TARGET_SSE"
1059  "@
1060   min<ssemodesuffix>\t{%2, %0|%0, %2}
1061   vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1062  [(set_attr "isa" "noavx,avx")
1063   (set_attr "type" "sseadd")
1064   (set_attr "prefix" "orig,vex")
1065   (set_attr "mode" "<MODE>")])
1066
1067(define_insn "*ieee_smax<mode>3"
1068  [(set (match_operand:VF 0 "register_operand" "=x,x")
1069	(unspec:VF
1070	  [(match_operand:VF 1 "register_operand" "0,x")
1071	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1072	 UNSPEC_IEEE_MAX))]
1073  "TARGET_SSE"
1074  "@
1075   max<ssemodesuffix>\t{%2, %0|%0, %2}
1076   vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1077  [(set_attr "isa" "noavx,avx")
1078   (set_attr "type" "sseadd")
1079   (set_attr "prefix" "orig,vex")
1080   (set_attr "mode" "<MODE>")])
1081
1082(define_insn "avx_addsubv4df3"
1083  [(set (match_operand:V4DF 0 "register_operand" "=x")
1084	(vec_merge:V4DF
1085	  (plus:V4DF
1086	    (match_operand:V4DF 1 "register_operand" "x")
1087	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1088	  (minus:V4DF (match_dup 1) (match_dup 2))
1089	  (const_int 10)))]
1090  "TARGET_AVX"
1091  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1092  [(set_attr "type" "sseadd")
1093   (set_attr "prefix" "vex")
1094   (set_attr "mode" "V4DF")])
1095
1096(define_insn "sse3_addsubv2df3"
1097  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1098	(vec_merge:V2DF
1099	  (plus:V2DF
1100	    (match_operand:V2DF 1 "register_operand" "0,x")
1101	    (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1102	  (minus:V2DF (match_dup 1) (match_dup 2))
1103	  (const_int 2)))]
1104  "TARGET_SSE3"
1105  "@
1106   addsubpd\t{%2, %0|%0, %2}
1107   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1108  [(set_attr "isa" "noavx,avx")
1109   (set_attr "type" "sseadd")
1110   (set_attr "atom_unit" "complex")
1111   (set_attr "prefix" "orig,vex")
1112   (set_attr "mode" "V2DF")])
1113
1114(define_insn "avx_addsubv8sf3"
1115  [(set (match_operand:V8SF 0 "register_operand" "=x")
1116	(vec_merge:V8SF
1117	  (plus:V8SF
1118	    (match_operand:V8SF 1 "register_operand" "x")
1119	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1120	  (minus:V8SF (match_dup 1) (match_dup 2))
1121	  (const_int 170)))]
1122  "TARGET_AVX"
1123  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1124  [(set_attr "type" "sseadd")
1125   (set_attr "prefix" "vex")
1126   (set_attr "mode" "V8SF")])
1127
1128(define_insn "sse3_addsubv4sf3"
1129  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1130	(vec_merge:V4SF
1131	  (plus:V4SF
1132	    (match_operand:V4SF 1 "register_operand" "0,x")
1133	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1134	  (minus:V4SF (match_dup 1) (match_dup 2))
1135	  (const_int 10)))]
1136  "TARGET_SSE3"
1137  "@
1138   addsubps\t{%2, %0|%0, %2}
1139   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1140  [(set_attr "isa" "noavx,avx")
1141   (set_attr "type" "sseadd")
1142   (set_attr "prefix" "orig,vex")
1143   (set_attr "prefix_rep" "1,*")
1144   (set_attr "mode" "V4SF")])
1145
1146(define_insn "avx_h<plusminus_insn>v4df3"
1147  [(set (match_operand:V4DF 0 "register_operand" "=x")
1148	(vec_concat:V4DF
1149	  (vec_concat:V2DF
1150	    (plusminus:DF
1151	      (vec_select:DF
1152		(match_operand:V4DF 1 "register_operand" "x")
1153		(parallel [(const_int 0)]))
1154	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1155	    (plusminus:DF
1156	      (vec_select:DF
1157		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
1158		(parallel [(const_int 0)]))
1159	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1160	  (vec_concat:V2DF
1161	    (plusminus:DF
1162	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1163	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1164	    (plusminus:DF
1165	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1166	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1167  "TARGET_AVX"
1168  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1169  [(set_attr "type" "sseadd")
1170   (set_attr "prefix" "vex")
1171   (set_attr "mode" "V4DF")])
1172
1173(define_insn "sse3_h<plusminus_insn>v2df3"
1174  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1175	(vec_concat:V2DF
1176	  (plusminus:DF
1177	    (vec_select:DF
1178	      (match_operand:V2DF 1 "register_operand" "0,x")
1179	      (parallel [(const_int 0)]))
1180	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1181	  (plusminus:DF
1182	    (vec_select:DF
1183	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1184	      (parallel [(const_int 0)]))
1185	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1186  "TARGET_SSE3"
1187  "@
1188   h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1189   vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1190  [(set_attr "isa" "noavx,avx")
1191   (set_attr "type" "sseadd")
1192   (set_attr "prefix" "orig,vex")
1193   (set_attr "mode" "V2DF")])
1194
1195(define_insn "avx_h<plusminus_insn>v8sf3"
1196  [(set (match_operand:V8SF 0 "register_operand" "=x")
1197	(vec_concat:V8SF
1198	  (vec_concat:V4SF
1199	    (vec_concat:V2SF
1200	      (plusminus:SF
1201		(vec_select:SF
1202		  (match_operand:V8SF 1 "register_operand" "x")
1203		  (parallel [(const_int 0)]))
1204		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1205	      (plusminus:SF
1206		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1207		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1208	    (vec_concat:V2SF
1209	      (plusminus:SF
1210		(vec_select:SF
1211		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1212		  (parallel [(const_int 0)]))
1213		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1214	      (plusminus:SF
1215		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1216		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1217	  (vec_concat:V4SF
1218	    (vec_concat:V2SF
1219	      (plusminus:SF
1220		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1221		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1222	      (plusminus:SF
1223		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1224		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1225	    (vec_concat:V2SF
1226	      (plusminus:SF
1227		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1228		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1229	      (plusminus:SF
1230		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1231		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1232  "TARGET_AVX"
1233  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1234  [(set_attr "type" "sseadd")
1235   (set_attr "prefix" "vex")
1236   (set_attr "mode" "V8SF")])
1237
1238(define_insn "sse3_h<plusminus_insn>v4sf3"
1239  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1240	(vec_concat:V4SF
1241	  (vec_concat:V2SF
1242	    (plusminus:SF
1243	      (vec_select:SF
1244		(match_operand:V4SF 1 "register_operand" "0,x")
1245		(parallel [(const_int 0)]))
1246	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1247	    (plusminus:SF
1248	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1249	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1250	  (vec_concat:V2SF
1251	    (plusminus:SF
1252	      (vec_select:SF
1253		(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1254		(parallel [(const_int 0)]))
1255	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1256	    (plusminus:SF
1257	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1258	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1259  "TARGET_SSE3"
1260  "@
1261   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1262   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1263  [(set_attr "isa" "noavx,avx")
1264   (set_attr "type" "sseadd")
1265   (set_attr "atom_unit" "complex")
1266   (set_attr "prefix" "orig,vex")
1267   (set_attr "prefix_rep" "1,*")
1268   (set_attr "mode" "V4SF")])
1269
1270(define_expand "reduc_splus_v4df"
1271  [(match_operand:V4DF 0 "register_operand" "")
1272   (match_operand:V4DF 1 "register_operand" "")]
1273  "TARGET_AVX"
1274{
1275  rtx tmp = gen_reg_rtx (V4DFmode);
1276  rtx tmp2 = gen_reg_rtx (V4DFmode);
1277  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1278  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1279  emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1280  DONE;
1281})
1282
1283(define_expand "reduc_splus_v2df"
1284  [(match_operand:V2DF 0 "register_operand" "")
1285   (match_operand:V2DF 1 "register_operand" "")]
1286  "TARGET_SSE3"
1287{
1288  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1289  DONE;
1290})
1291
1292(define_expand "reduc_splus_v8sf"
1293  [(match_operand:V8SF 0 "register_operand" "")
1294   (match_operand:V8SF 1 "register_operand" "")]
1295  "TARGET_AVX"
1296{
1297  rtx tmp = gen_reg_rtx (V8SFmode);
1298  rtx tmp2 = gen_reg_rtx (V8SFmode);
1299  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1300  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1301  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1302  emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1303  DONE;
1304})
1305
1306(define_expand "reduc_splus_v4sf"
1307  [(match_operand:V4SF 0 "register_operand" "")
1308   (match_operand:V4SF 1 "register_operand" "")]
1309  "TARGET_SSE"
1310{
1311  if (TARGET_SSE3)
1312    {
1313      rtx tmp = gen_reg_rtx (V4SFmode);
1314      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1315      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1316    }
1317  else
1318    ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1319  DONE;
1320})
1321
1322;; Modes handled by reduc_sm{in,ax}* patterns.
1323(define_mode_iterator REDUC_SMINMAX_MODE
1324  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1325   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1326   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1327   (V4SF "TARGET_SSE")])
1328
1329(define_expand "reduc_<code>_<mode>"
1330  [(smaxmin:REDUC_SMINMAX_MODE
1331     (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1332     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1333  ""
1334{
1335  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1336  DONE;
1337})
1338
1339(define_expand "reduc_<code>_<mode>"
1340  [(umaxmin:VI_256
1341     (match_operand:VI_256 0 "register_operand" "")
1342     (match_operand:VI_256 1 "register_operand" ""))]
1343  "TARGET_AVX2"
1344{
1345  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1346  DONE;
1347})
1348
1349(define_expand "reduc_umin_v8hi"
1350  [(umin:V8HI
1351     (match_operand:V8HI 0 "register_operand" "")
1352     (match_operand:V8HI 1 "register_operand" ""))]
1353  "TARGET_SSE4_1"
1354{
1355  ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1356  DONE;
1357})
1358
1359;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1360;;
1361;; Parallel floating point comparisons
1362;;
1363;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1364
1365(define_insn "avx_cmp<mode>3"
1366  [(set (match_operand:VF 0 "register_operand" "=x")
1367	(unspec:VF
1368	  [(match_operand:VF 1 "register_operand" "x")
1369	   (match_operand:VF 2 "nonimmediate_operand" "xm")
1370	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
1371	  UNSPEC_PCMP))]
1372  "TARGET_AVX"
1373  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1374  [(set_attr "type" "ssecmp")
1375   (set_attr "length_immediate" "1")
1376   (set_attr "prefix" "vex")
1377   (set_attr "mode" "<MODE>")])
1378
1379(define_insn "avx_vmcmp<mode>3"
1380  [(set (match_operand:VF_128 0 "register_operand" "=x")
1381	(vec_merge:VF_128
1382	  (unspec:VF_128
1383	    [(match_operand:VF_128 1 "register_operand" "x")
1384	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1385	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
1386	    UNSPEC_PCMP)
1387	 (match_dup 1)
1388	 (const_int 1)))]
1389  "TARGET_AVX"
1390  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1391  [(set_attr "type" "ssecmp")
1392   (set_attr "length_immediate" "1")
1393   (set_attr "prefix" "vex")
1394   (set_attr "mode" "<ssescalarmode>")])
1395
1396(define_insn "*<sse>_maskcmp<mode>3_comm"
1397  [(set (match_operand:VF 0 "register_operand" "=x,x")
1398	(match_operator:VF 3 "sse_comparison_operator"
1399	  [(match_operand:VF 1 "register_operand" "%0,x")
1400	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1401  "TARGET_SSE
1402   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1403  "@
1404   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1405   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1406  [(set_attr "isa" "noavx,avx")
1407   (set_attr "type" "ssecmp")
1408   (set_attr "length_immediate" "1")
1409   (set_attr "prefix" "orig,vex")
1410   (set_attr "mode" "<MODE>")])
1411
1412(define_insn "<sse>_maskcmp<mode>3"
1413  [(set (match_operand:VF 0 "register_operand" "=x,x")
1414	(match_operator:VF 3 "sse_comparison_operator"
1415	  [(match_operand:VF 1 "register_operand" "0,x")
1416	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1417  "TARGET_SSE"
1418  "@
1419   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1420   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1421  [(set_attr "isa" "noavx,avx")
1422   (set_attr "type" "ssecmp")
1423   (set_attr "length_immediate" "1")
1424   (set_attr "prefix" "orig,vex")
1425   (set_attr "mode" "<MODE>")])
1426
1427(define_insn "<sse>_vmmaskcmp<mode>3"
1428  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1429	(vec_merge:VF_128
1430	 (match_operator:VF_128 3 "sse_comparison_operator"
1431	   [(match_operand:VF_128 1 "register_operand" "0,x")
1432	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1433	 (match_dup 1)
1434	 (const_int 1)))]
1435  "TARGET_SSE"
1436  "@
1437   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1438   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1439  [(set_attr "isa" "noavx,avx")
1440   (set_attr "type" "ssecmp")
1441   (set_attr "length_immediate" "1,*")
1442   (set_attr "prefix" "orig,vex")
1443   (set_attr "mode" "<ssescalarmode>")])
1444
1445(define_insn "<sse>_comi"
1446  [(set (reg:CCFP FLAGS_REG)
1447	(compare:CCFP
1448	  (vec_select:MODEF
1449	    (match_operand:<ssevecmode> 0 "register_operand" "x")
1450	    (parallel [(const_int 0)]))
1451	  (vec_select:MODEF
1452	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1453	    (parallel [(const_int 0)]))))]
1454  "SSE_FLOAT_MODE_P (<MODE>mode)"
1455  "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1456  [(set_attr "type" "ssecomi")
1457   (set_attr "prefix" "maybe_vex")
1458   (set_attr "prefix_rep" "0")
1459   (set (attr "prefix_data16")
1460	(if_then_else (eq_attr "mode" "DF")
1461		      (const_string "1")
1462		      (const_string "0")))
1463   (set_attr "mode" "<MODE>")])
1464
1465(define_insn "<sse>_ucomi"
1466  [(set (reg:CCFPU FLAGS_REG)
1467	(compare:CCFPU
1468	  (vec_select:MODEF
1469	    (match_operand:<ssevecmode> 0 "register_operand" "x")
1470	    (parallel [(const_int 0)]))
1471	  (vec_select:MODEF
1472	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1473	    (parallel [(const_int 0)]))))]
1474  "SSE_FLOAT_MODE_P (<MODE>mode)"
1475  "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1476  [(set_attr "type" "ssecomi")
1477   (set_attr "prefix" "maybe_vex")
1478   (set_attr "prefix_rep" "0")
1479   (set (attr "prefix_data16")
1480	(if_then_else (eq_attr "mode" "DF")
1481		      (const_string "1")
1482		      (const_string "0")))
1483   (set_attr "mode" "<MODE>")])
1484
1485(define_expand "vcond<V_256:mode><VF_256:mode>"
1486  [(set (match_operand:V_256 0 "register_operand" "")
1487	(if_then_else:V_256
1488	  (match_operator 3 ""
1489	    [(match_operand:VF_256 4 "nonimmediate_operand" "")
1490	     (match_operand:VF_256 5 "nonimmediate_operand" "")])
1491	  (match_operand:V_256 1 "general_operand" "")
1492	  (match_operand:V_256 2 "general_operand" "")))]
1493  "TARGET_AVX
1494   && (GET_MODE_NUNITS (<V_256:MODE>mode)
1495       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1496{
1497  bool ok = ix86_expand_fp_vcond (operands);
1498  gcc_assert (ok);
1499  DONE;
1500})
1501
1502(define_expand "vcond<V_128:mode><VF_128:mode>"
1503  [(set (match_operand:V_128 0 "register_operand" "")
1504	(if_then_else:V_128
1505	  (match_operator 3 ""
1506	    [(match_operand:VF_128 4 "nonimmediate_operand" "")
1507	     (match_operand:VF_128 5 "nonimmediate_operand" "")])
1508	  (match_operand:V_128 1 "general_operand" "")
1509	  (match_operand:V_128 2 "general_operand" "")))]
1510  "TARGET_SSE
1511   && (GET_MODE_NUNITS (<V_128:MODE>mode)
1512       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1513{
1514  bool ok = ix86_expand_fp_vcond (operands);
1515  gcc_assert (ok);
1516  DONE;
1517})
1518
1519;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1520;;
1521;; Parallel floating point logical operations
1522;;
1523;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1524
1525(define_insn "<sse>_andnot<mode>3"
1526  [(set (match_operand:VF 0 "register_operand" "=x,x")
1527	(and:VF
1528	  (not:VF
1529	    (match_operand:VF 1 "register_operand" "0,x"))
1530	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1531  "TARGET_SSE"
1532{
1533  static char buf[32];
1534  const char *insn;
1535  const char *suffix
1536    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1537
1538  switch (which_alternative)
1539    {
1540    case 0:
1541      insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1542      break;
1543    case 1:
1544      insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1545      break;
1546    default:
1547      gcc_unreachable ();
1548    }
1549
1550  snprintf (buf, sizeof (buf), insn, suffix);
1551  return buf;
1552}
1553  [(set_attr "isa" "noavx,avx")
1554   (set_attr "type" "sselog")
1555   (set_attr "prefix" "orig,vex")
1556   (set_attr "mode" "<MODE>")])
1557
1558(define_expand "<code><mode>3"
1559  [(set (match_operand:VF 0 "register_operand" "")
1560	(any_logic:VF
1561	  (match_operand:VF 1 "nonimmediate_operand" "")
1562	  (match_operand:VF 2 "nonimmediate_operand" "")))]
1563  "TARGET_SSE"
1564  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1565
1566(define_insn "*<code><mode>3"
1567  [(set (match_operand:VF 0 "register_operand" "=x,x")
1568	(any_logic:VF
1569	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1570	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1571  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1572{
1573  static char buf[32];
1574  const char *insn;
1575  const char *suffix
1576    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1577
1578  switch (which_alternative)
1579    {
1580    case 0:
1581      insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1582      break;
1583    case 1:
1584      insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1585      break;
1586    default:
1587      gcc_unreachable ();
1588    }
1589
1590  snprintf (buf, sizeof (buf), insn, suffix);
1591  return buf;
1592}
1593  [(set_attr "isa" "noavx,avx")
1594   (set_attr "type" "sselog")
1595   (set_attr "prefix" "orig,vex")
1596   (set_attr "mode" "<MODE>")])
1597
1598(define_expand "copysign<mode>3"
1599  [(set (match_dup 4)
1600	(and:VF
1601	  (not:VF (match_dup 3))
1602	  (match_operand:VF 1 "nonimmediate_operand" "")))
1603   (set (match_dup 5)
1604	(and:VF (match_dup 3)
1605		(match_operand:VF 2 "nonimmediate_operand" "")))
1606   (set (match_operand:VF 0 "register_operand" "")
1607	(ior:VF (match_dup 4) (match_dup 5)))]
1608  "TARGET_SSE"
1609{
1610  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1611
1612  operands[4] = gen_reg_rtx (<MODE>mode);
1613  operands[5] = gen_reg_rtx (<MODE>mode);
1614})
1615
1616;; Also define scalar versions.  These are used for abs, neg, and
1617;; conditional move.  Using subregs into vector modes causes register
1618;; allocation lossage.  These patterns do not allow memory operands
1619;; because the native instructions read the full 128-bits.
1620
1621(define_insn "*andnot<mode>3"
1622  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1623	(and:MODEF
1624	  (not:MODEF
1625	    (match_operand:MODEF 1 "register_operand" "0,x"))
1626	    (match_operand:MODEF 2 "register_operand" "x,x")))]
1627  "SSE_FLOAT_MODE_P (<MODE>mode)"
1628{
1629  static char buf[32];
1630  const char *insn;
1631  const char *suffix
1632    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1633
1634  switch (which_alternative)
1635    {
1636    case 0:
1637      insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1638      break;
1639    case 1:
1640      insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1641      break;
1642    default:
1643      gcc_unreachable ();
1644    }
1645
1646  snprintf (buf, sizeof (buf), insn, suffix);
1647  return buf;
1648}
1649  [(set_attr "isa" "noavx,avx")
1650   (set_attr "type" "sselog")
1651   (set_attr "prefix" "orig,vex")
1652   (set_attr "mode" "<ssevecmode>")])
1653
1654(define_insn "*<code><mode>3"
1655  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1656	(any_logic:MODEF
1657	  (match_operand:MODEF 1 "register_operand" "%0,x")
1658	  (match_operand:MODEF 2 "register_operand" "x,x")))]
1659  "SSE_FLOAT_MODE_P (<MODE>mode)"
1660{
1661  static char buf[32];
1662  const char *insn;
1663  const char *suffix
1664    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1665
1666  switch (which_alternative)
1667    {
1668    case 0:
1669      insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1670      break;
1671    case 1:
1672      insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1673      break;
1674    default:
1675      gcc_unreachable ();
1676    }
1677
1678  snprintf (buf, sizeof (buf), insn, suffix);
1679  return buf;
1680}
1681  [(set_attr "isa" "noavx,avx")
1682   (set_attr "type" "sselog")
1683   (set_attr "prefix" "orig,vex")
1684   (set_attr "mode" "<ssevecmode>")])
1685
1686;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1687;;
1688;; FMA floating point multiply/accumulate instructions.  These include
1689;; scalar versions of the instructions as well as vector versions.
1690;;
1691;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1692
1693(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
1694
1695;; The standard names for fma is only available with SSE math enabled.
1696(define_expand "fma<mode>4"
1697  [(set (match_operand:FMAMODE 0 "register_operand")
1698	(fma:FMAMODE
1699	  (match_operand:FMAMODE 1 "nonimmediate_operand")
1700	  (match_operand:FMAMODE 2 "nonimmediate_operand")
1701	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1702  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1703
1704(define_expand "fms<mode>4"
1705  [(set (match_operand:FMAMODE 0 "register_operand")
1706	(fma:FMAMODE
1707	  (match_operand:FMAMODE 1 "nonimmediate_operand")
1708	  (match_operand:FMAMODE 2 "nonimmediate_operand")
1709	  (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1710  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1711
1712(define_expand "fnma<mode>4"
1713  [(set (match_operand:FMAMODE 0 "register_operand")
1714	(fma:FMAMODE
1715	  (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1716	  (match_operand:FMAMODE 2 "nonimmediate_operand")
1717	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1718  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1719
1720(define_expand "fnms<mode>4"
1721  [(set (match_operand:FMAMODE 0 "register_operand")
1722	(fma:FMAMODE
1723	  (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1724	  (match_operand:FMAMODE 2 "nonimmediate_operand")
1725	  (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1726  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1727
1728;; The builtin for intrinsics is not constrained by SSE math enabled.
1729(define_expand "fma4i_fmadd_<mode>"
1730  [(set (match_operand:FMAMODE 0 "register_operand")
1731	(fma:FMAMODE
1732	  (match_operand:FMAMODE 1 "nonimmediate_operand")
1733	  (match_operand:FMAMODE 2 "nonimmediate_operand")
1734	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1735  "TARGET_FMA || TARGET_FMA4")
1736
1737(define_insn "*fma_fmadd_<mode>"
1738  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1739	(fma:FMAMODE
1740	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1741	  (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1742	  (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1743  "TARGET_FMA || TARGET_FMA4"
1744  "@
1745   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1746   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1747   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1748   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1749   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1750  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1751   (set_attr "type" "ssemuladd")
1752   (set_attr "mode" "<MODE>")])
1753
1754(define_insn "*fma_fmsub_<mode>"
1755  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1756	(fma:FMAMODE
1757	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x, x,x")
1758	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
1759	  (neg:FMAMODE
1760	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1761  "TARGET_FMA || TARGET_FMA4"
1762  "@
1763   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1764   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1765   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1766   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1767   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1768  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1769   (set_attr "type" "ssemuladd")
1770   (set_attr "mode" "<MODE>")])
1771
1772(define_insn "*fma_fnmadd_<mode>"
1773  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1774	(fma:FMAMODE
1775	  (neg:FMAMODE
1776	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1777	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
1778	  (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1779  "TARGET_FMA || TARGET_FMA4"
1780  "@
1781   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1782   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1783   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1784   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1785   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1786  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1787   (set_attr "type" "ssemuladd")
1788   (set_attr "mode" "<MODE>")])
1789
1790(define_insn "*fma_fnmsub_<mode>"
1791  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1792	(fma:FMAMODE
1793	  (neg:FMAMODE
1794	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1795	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
1796	  (neg:FMAMODE
1797	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1798  "TARGET_FMA || TARGET_FMA4"
1799  "@
1800   vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1801   vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1802   vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1803   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1804   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1805  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1806   (set_attr "type" "ssemuladd")
1807   (set_attr "mode" "<MODE>")])
1808
1809;; FMA parallel floating point multiply addsub and subadd operations.
1810
1811;; It would be possible to represent these without the UNSPEC as
1812;;
1813;; (vec_merge
1814;;   (fma op1 op2 op3)
1815;;   (fma op1 op2 (neg op3))
1816;;   (merge-const))
1817;;
1818;; But this doesn't seem useful in practice.
1819
1820(define_expand "fmaddsub_<mode>"
1821  [(set (match_operand:VF 0 "register_operand")
1822	(unspec:VF
1823	  [(match_operand:VF 1 "nonimmediate_operand")
1824	   (match_operand:VF 2 "nonimmediate_operand")
1825	   (match_operand:VF 3 "nonimmediate_operand")]
1826	  UNSPEC_FMADDSUB))]
1827  "TARGET_FMA || TARGET_FMA4")
1828
1829(define_insn "*fma_fmaddsub_<mode>"
1830  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1831	(unspec:VF
1832	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1833	   (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1834	   (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
1835	  UNSPEC_FMADDSUB))]
1836  "TARGET_FMA || TARGET_FMA4"
1837  "@
1838   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1839   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1840   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1841   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1842   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1843  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1844   (set_attr "type" "ssemuladd")
1845   (set_attr "mode" "<MODE>")])
1846
1847(define_insn "*fma_fmsubadd_<mode>"
1848  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1849	(unspec:VF
1850	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0,x, x,x")
1851	   (match_operand:VF   2 "nonimmediate_operand" "xm, x,xm,x,m")
1852	   (neg:VF
1853	     (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
1854	  UNSPEC_FMADDSUB))]
1855  "TARGET_FMA || TARGET_FMA4"
1856  "@
1857   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1858   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1859   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1860   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1861   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1862  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1863   (set_attr "type" "ssemuladd")
1864   (set_attr "mode" "<MODE>")])
1865
1866;; FMA3 floating point scalar intrinsics. These merge result with
1867;; high-order elements from the destination register.
1868
1869(define_expand "fmai_vmfmadd_<mode>"
1870  [(set (match_operand:VF_128 0 "register_operand")
1871	(vec_merge:VF_128
1872	  (fma:VF_128
1873	    (match_operand:VF_128 1 "nonimmediate_operand")
1874	    (match_operand:VF_128 2 "nonimmediate_operand")
1875	    (match_operand:VF_128 3 "nonimmediate_operand"))
1876	  (match_dup 0)
1877	  (const_int 1)))]
1878  "TARGET_FMA")
1879
1880(define_insn "*fmai_fmadd_<mode>"
1881  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1882        (vec_merge:VF_128
1883	  (fma:VF_128
1884	    (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1885	    (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1886	    (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1887	  (match_dup 0)
1888	  (const_int 1)))]
1889  "TARGET_FMA"
1890  "@
1891   vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1892   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1893   vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1894  [(set_attr "type" "ssemuladd")
1895   (set_attr "mode" "<MODE>")])
1896
1897(define_insn "*fmai_fmsub_<mode>"
1898  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1899        (vec_merge:VF_128
1900	  (fma:VF_128
1901	    (match_operand:VF_128   1 "nonimmediate_operand" "%0, 0,x")
1902	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
1903	    (neg:VF_128
1904	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1905	  (match_dup 0)
1906	  (const_int 1)))]
1907  "TARGET_FMA"
1908  "@
1909   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1910   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1911   vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1912  [(set_attr "type" "ssemuladd")
1913   (set_attr "mode" "<MODE>")])
1914
1915(define_insn "*fmai_fnmadd_<mode>"
1916  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1917        (vec_merge:VF_128
1918	  (fma:VF_128
1919	    (neg:VF_128
1920	      (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1921	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
1922	    (match_operand:VF_128   3 "nonimmediate_operand" " x,xm,0"))
1923	  (match_dup 0)
1924	  (const_int 1)))]
1925  "TARGET_FMA"
1926  "@
1927   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1928   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1929   vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1930  [(set_attr "type" "ssemuladd")
1931   (set_attr "mode" "<MODE>")])
1932
1933(define_insn "*fmai_fnmsub_<mode>"
1934  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1935        (vec_merge:VF_128
1936	  (fma:VF_128
1937	    (neg:VF_128
1938	      (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1939	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
1940	    (neg:VF_128
1941	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1942	  (match_dup 0)
1943	  (const_int 1)))]
1944  "TARGET_FMA"
1945  "@
1946   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1947   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1948   vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1949  [(set_attr "type" "ssemuladd")
1950   (set_attr "mode" "<MODE>")])
1951
1952;; FMA4 floating point scalar intrinsics.  These write the
1953;; entire destination register, with the high-order elements zeroed.
1954
1955(define_expand "fma4i_vmfmadd_<mode>"
1956  [(set (match_operand:VF_128 0 "register_operand")
1957	(vec_merge:VF_128
1958	  (fma:VF_128
1959	    (match_operand:VF_128 1 "nonimmediate_operand")
1960	    (match_operand:VF_128 2 "nonimmediate_operand")
1961	    (match_operand:VF_128 3 "nonimmediate_operand"))
1962	  (match_dup 4)
1963	  (const_int 1)))]
1964  "TARGET_FMA4"
1965{
1966  operands[4] = CONST0_RTX (<MODE>mode);
1967})
1968
1969(define_insn "*fma4i_vmfmadd_<mode>"
1970  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1971	(vec_merge:VF_128
1972	  (fma:VF_128
1973	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1974	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1975	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1976	  (match_operand:VF_128 4 "const0_operand" "")
1977	  (const_int 1)))]
1978  "TARGET_FMA4"
1979  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1980  [(set_attr "type" "ssemuladd")
1981   (set_attr "mode" "<MODE>")])
1982
1983(define_insn "*fma4i_vmfmsub_<mode>"
1984  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1985	(vec_merge:VF_128
1986	  (fma:VF_128
1987	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1988	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1989	    (neg:VF_128
1990	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1991	  (match_operand:VF_128 4 "const0_operand" "")
1992	  (const_int 1)))]
1993  "TARGET_FMA4"
1994  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1995  [(set_attr "type" "ssemuladd")
1996   (set_attr "mode" "<MODE>")])
1997
1998(define_insn "*fma4i_vmfnmadd_<mode>"
1999  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2000	(vec_merge:VF_128
2001	  (fma:VF_128
2002	    (neg:VF_128
2003	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2004	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
2005	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
2006	  (match_operand:VF_128 4 "const0_operand" "")
2007	  (const_int 1)))]
2008  "TARGET_FMA4"
2009  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010  [(set_attr "type" "ssemuladd")
2011   (set_attr "mode" "<MODE>")])
2012
2013(define_insn "*fma4i_vmfnmsub_<mode>"
2014  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2015	(vec_merge:VF_128
2016	  (fma:VF_128
2017	    (neg:VF_128
2018	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2019	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
2020	    (neg:VF_128
2021	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
2022	  (match_operand:VF_128 4 "const0_operand" "")
2023	  (const_int 1)))]
2024  "TARGET_FMA4"
2025  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2026  [(set_attr "type" "ssemuladd")
2027   (set_attr "mode" "<MODE>")])
2028
2029;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2030;;
2031;; Parallel single-precision floating point conversion operations
2032;;
2033;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2034
2035(define_insn "sse_cvtpi2ps"
2036  [(set (match_operand:V4SF 0 "register_operand" "=x")
2037	(vec_merge:V4SF
2038	  (vec_duplicate:V4SF
2039	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2040	  (match_operand:V4SF 1 "register_operand" "0")
2041	  (const_int 3)))]
2042  "TARGET_SSE"
2043  "cvtpi2ps\t{%2, %0|%0, %2}"
2044  [(set_attr "type" "ssecvt")
2045   (set_attr "mode" "V4SF")])
2046
2047(define_insn "sse_cvtps2pi"
2048  [(set (match_operand:V2SI 0 "register_operand" "=y")
2049	(vec_select:V2SI
2050	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2051		       UNSPEC_FIX_NOTRUNC)
2052	  (parallel [(const_int 0) (const_int 1)])))]
2053  "TARGET_SSE"
2054  "cvtps2pi\t{%1, %0|%0, %1}"
2055  [(set_attr "type" "ssecvt")
2056   (set_attr "unit" "mmx")
2057   (set_attr "mode" "DI")])
2058
2059(define_insn "sse_cvttps2pi"
2060  [(set (match_operand:V2SI 0 "register_operand" "=y")
2061	(vec_select:V2SI
2062	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2063	  (parallel [(const_int 0) (const_int 1)])))]
2064  "TARGET_SSE"
2065  "cvttps2pi\t{%1, %0|%0, %1}"
2066  [(set_attr "type" "ssecvt")
2067   (set_attr "unit" "mmx")
2068   (set_attr "prefix_rep" "0")
2069   (set_attr "mode" "SF")])
2070
2071(define_insn "sse_cvtsi2ss"
2072  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2073	(vec_merge:V4SF
2074	  (vec_duplicate:V4SF
2075	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2076	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2077	  (const_int 1)))]
2078  "TARGET_SSE"
2079  "@
2080   cvtsi2ss\t{%2, %0|%0, %2}
2081   cvtsi2ss\t{%2, %0|%0, %2}
2082   vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2083  [(set_attr "isa" "noavx,noavx,avx")
2084   (set_attr "type" "sseicvt")
2085   (set_attr "athlon_decode" "vector,double,*")
2086   (set_attr "amdfam10_decode" "vector,double,*")
2087   (set_attr "bdver1_decode" "double,direct,*")
2088   (set_attr "prefix" "orig,orig,vex")
2089   (set_attr "mode" "SF")])
2090
2091(define_insn "sse_cvtsi2ssq"
2092  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2093	(vec_merge:V4SF
2094	  (vec_duplicate:V4SF
2095	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2096	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2097	  (const_int 1)))]
2098  "TARGET_SSE && TARGET_64BIT"
2099  "@
2100   cvtsi2ssq\t{%2, %0|%0, %2}
2101   cvtsi2ssq\t{%2, %0|%0, %2}
2102   vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2103  [(set_attr "isa" "noavx,noavx,avx")
2104   (set_attr "type" "sseicvt")
2105   (set_attr "athlon_decode" "vector,double,*")
2106   (set_attr "amdfam10_decode" "vector,double,*")
2107   (set_attr "bdver1_decode" "double,direct,*")
2108   (set_attr "length_vex" "*,*,4")
2109   (set_attr "prefix_rex" "1,1,*")
2110   (set_attr "prefix" "orig,orig,vex")
2111   (set_attr "mode" "SF")])
2112
2113(define_insn "sse_cvtss2si"
2114  [(set (match_operand:SI 0 "register_operand" "=r,r")
2115	(unspec:SI
2116	  [(vec_select:SF
2117	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2118	     (parallel [(const_int 0)]))]
2119	  UNSPEC_FIX_NOTRUNC))]
2120  "TARGET_SSE"
2121  "%vcvtss2si\t{%1, %0|%0, %1}"
2122  [(set_attr "type" "sseicvt")
2123   (set_attr "athlon_decode" "double,vector")
2124   (set_attr "bdver1_decode" "double,double")
2125   (set_attr "prefix_rep" "1")
2126   (set_attr "prefix" "maybe_vex")
2127   (set_attr "mode" "SI")])
2128
2129(define_insn "sse_cvtss2si_2"
2130  [(set (match_operand:SI 0 "register_operand" "=r,r")
2131	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2132		   UNSPEC_FIX_NOTRUNC))]
2133  "TARGET_SSE"
2134  "%vcvtss2si\t{%1, %0|%0, %1}"
2135  [(set_attr "type" "sseicvt")
2136   (set_attr "athlon_decode" "double,vector")
2137   (set_attr "amdfam10_decode" "double,double")
2138   (set_attr "bdver1_decode" "double,double")
2139   (set_attr "prefix_rep" "1")
2140   (set_attr "prefix" "maybe_vex")
2141   (set_attr "mode" "SI")])
2142
2143(define_insn "sse_cvtss2siq"
2144  [(set (match_operand:DI 0 "register_operand" "=r,r")
2145	(unspec:DI
2146	  [(vec_select:SF
2147	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2148	     (parallel [(const_int 0)]))]
2149	  UNSPEC_FIX_NOTRUNC))]
2150  "TARGET_SSE && TARGET_64BIT"
2151  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2152  [(set_attr "type" "sseicvt")
2153   (set_attr "athlon_decode" "double,vector")
2154   (set_attr "bdver1_decode" "double,double")
2155   (set_attr "prefix_rep" "1")
2156   (set_attr "prefix" "maybe_vex")
2157   (set_attr "mode" "DI")])
2158
2159(define_insn "sse_cvtss2siq_2"
2160  [(set (match_operand:DI 0 "register_operand" "=r,r")
2161	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2162		   UNSPEC_FIX_NOTRUNC))]
2163  "TARGET_SSE && TARGET_64BIT"
2164  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2165  [(set_attr "type" "sseicvt")
2166   (set_attr "athlon_decode" "double,vector")
2167   (set_attr "amdfam10_decode" "double,double")
2168   (set_attr "bdver1_decode" "double,double")
2169   (set_attr "prefix_rep" "1")
2170   (set_attr "prefix" "maybe_vex")
2171   (set_attr "mode" "DI")])
2172
2173(define_insn "sse_cvttss2si"
2174  [(set (match_operand:SI 0 "register_operand" "=r,r")
2175	(fix:SI
2176	  (vec_select:SF
2177	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2178	    (parallel [(const_int 0)]))))]
2179  "TARGET_SSE"
2180  "%vcvttss2si\t{%1, %0|%0, %1}"
2181  [(set_attr "type" "sseicvt")
2182   (set_attr "athlon_decode" "double,vector")
2183   (set_attr "amdfam10_decode" "double,double")
2184   (set_attr "bdver1_decode" "double,double")
2185   (set_attr "prefix_rep" "1")
2186   (set_attr "prefix" "maybe_vex")
2187   (set_attr "mode" "SI")])
2188
2189(define_insn "sse_cvttss2siq"
2190  [(set (match_operand:DI 0 "register_operand" "=r,r")
2191	(fix:DI
2192	  (vec_select:SF
2193	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2194	    (parallel [(const_int 0)]))))]
2195  "TARGET_SSE && TARGET_64BIT"
2196  "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2197  [(set_attr "type" "sseicvt")
2198   (set_attr "athlon_decode" "double,vector")
2199   (set_attr "amdfam10_decode" "double,double")
2200   (set_attr "bdver1_decode" "double,double")
2201   (set_attr "prefix_rep" "1")
2202   (set_attr "prefix" "maybe_vex")
2203   (set_attr "mode" "DI")])
2204
2205(define_insn "float<sseintvecmodelower><mode>2"
2206  [(set (match_operand:VF1 0 "register_operand" "=x")
2207	(float:VF1
2208	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2209  "TARGET_SSE2"
2210  "%vcvtdq2ps\t{%1, %0|%0, %1}"
2211  [(set_attr "type" "ssecvt")
2212   (set_attr "prefix" "maybe_vex")
2213   (set_attr "mode" "<sseinsnmode>")])
2214
2215(define_expand "floatuns<sseintvecmodelower><mode>2"
2216  [(match_operand:VF1 0 "register_operand" "")
2217   (match_operand:<sseintvecmode> 1 "register_operand" "")]
2218  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2219{
2220  ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2221  DONE;
2222})
2223
2224(define_insn "avx_cvtps2dq256"
2225  [(set (match_operand:V8SI 0 "register_operand" "=x")
2226	(unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2227		     UNSPEC_FIX_NOTRUNC))]
2228  "TARGET_AVX"
2229  "vcvtps2dq\t{%1, %0|%0, %1}"
2230  [(set_attr "type" "ssecvt")
2231   (set_attr "prefix" "vex")
2232   (set_attr "mode" "OI")])
2233
2234(define_insn "sse2_cvtps2dq"
2235  [(set (match_operand:V4SI 0 "register_operand" "=x")
2236	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2237		     UNSPEC_FIX_NOTRUNC))]
2238  "TARGET_SSE2"
2239  "%vcvtps2dq\t{%1, %0|%0, %1}"
2240  [(set_attr "type" "ssecvt")
2241   (set (attr "prefix_data16")
2242     (if_then_else
2243       (match_test "TARGET_AVX")
2244     (const_string "*")
2245     (const_string "1")))
2246   (set_attr "prefix" "maybe_vex")
2247   (set_attr "mode" "TI")])
2248
2249(define_insn "fix_truncv8sfv8si2"
2250  [(set (match_operand:V8SI 0 "register_operand" "=x")
2251	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2252  "TARGET_AVX"
2253  "vcvttps2dq\t{%1, %0|%0, %1}"
2254  [(set_attr "type" "ssecvt")
2255   (set_attr "prefix" "vex")
2256   (set_attr "mode" "OI")])
2257
2258(define_insn "fix_truncv4sfv4si2"
2259  [(set (match_operand:V4SI 0 "register_operand" "=x")
2260	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2261  "TARGET_SSE2"
2262  "%vcvttps2dq\t{%1, %0|%0, %1}"
2263  [(set_attr "type" "ssecvt")
2264   (set (attr "prefix_rep")
2265     (if_then_else
2266       (match_test "TARGET_AVX")
2267     (const_string "*")
2268     (const_string "1")))
2269   (set (attr "prefix_data16")
2270     (if_then_else
2271       (match_test "TARGET_AVX")
2272     (const_string "*")
2273     (const_string "0")))
2274   (set_attr "prefix_data16" "0")
2275   (set_attr "prefix" "maybe_vex")
2276   (set_attr "mode" "TI")])
2277
2278(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2279  [(match_operand:<sseintvecmode> 0 "register_operand" "")
2280   (match_operand:VF1 1 "register_operand" "")]
2281  "TARGET_SSE2"
2282{
2283  rtx tmp[3];
2284  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2285  tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2286  emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2287  emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2288  DONE;
2289})
2290
2291;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2292;;
2293;; Parallel double-precision floating point conversion operations
2294;;
2295;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2296
2297(define_insn "sse2_cvtpi2pd"
2298  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2299	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2300  "TARGET_SSE2"
2301  "cvtpi2pd\t{%1, %0|%0, %1}"
2302  [(set_attr "type" "ssecvt")
2303   (set_attr "unit" "mmx,*")
2304   (set_attr "prefix_data16" "1,*")
2305   (set_attr "mode" "V2DF")])
2306
2307(define_insn "sse2_cvtpd2pi"
2308  [(set (match_operand:V2SI 0 "register_operand" "=y")
2309	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2310		     UNSPEC_FIX_NOTRUNC))]
2311  "TARGET_SSE2"
2312  "cvtpd2pi\t{%1, %0|%0, %1}"
2313  [(set_attr "type" "ssecvt")
2314   (set_attr "unit" "mmx")
2315   (set_attr "bdver1_decode" "double")
2316   (set_attr "prefix_data16" "1")
2317   (set_attr "mode" "DI")])
2318
2319(define_insn "sse2_cvttpd2pi"
2320  [(set (match_operand:V2SI 0 "register_operand" "=y")
2321	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2322  "TARGET_SSE2"
2323  "cvttpd2pi\t{%1, %0|%0, %1}"
2324  [(set_attr "type" "ssecvt")
2325   (set_attr "unit" "mmx")
2326   (set_attr "bdver1_decode" "double")
2327   (set_attr "prefix_data16" "1")
2328   (set_attr "mode" "TI")])
2329
2330(define_insn "sse2_cvtsi2sd"
2331  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2332	(vec_merge:V2DF
2333	  (vec_duplicate:V2DF
2334	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2335	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2336	  (const_int 1)))]
2337  "TARGET_SSE2"
2338  "@
2339   cvtsi2sd\t{%2, %0|%0, %2}
2340   cvtsi2sd\t{%2, %0|%0, %2}
2341   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2342  [(set_attr "isa" "noavx,noavx,avx")
2343   (set_attr "type" "sseicvt")
2344   (set_attr "athlon_decode" "double,direct,*")
2345   (set_attr "amdfam10_decode" "vector,double,*")
2346   (set_attr "bdver1_decode" "double,direct,*")
2347   (set_attr "prefix" "orig,orig,vex")
2348   (set_attr "mode" "DF")])
2349
2350(define_insn "sse2_cvtsi2sdq"
2351  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2352	(vec_merge:V2DF
2353	  (vec_duplicate:V2DF
2354	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2355	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2356	  (const_int 1)))]
2357  "TARGET_SSE2 && TARGET_64BIT"
2358  "@
2359   cvtsi2sdq\t{%2, %0|%0, %2}
2360   cvtsi2sdq\t{%2, %0|%0, %2}
2361   vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2362  [(set_attr "isa" "noavx,noavx,avx")
2363   (set_attr "type" "sseicvt")
2364   (set_attr "athlon_decode" "double,direct,*")
2365   (set_attr "amdfam10_decode" "vector,double,*")
2366   (set_attr "bdver1_decode" "double,direct,*")
2367   (set_attr "length_vex" "*,*,4")
2368   (set_attr "prefix_rex" "1,1,*")
2369   (set_attr "prefix" "orig,orig,vex")
2370   (set_attr "mode" "DF")])
2371
2372(define_insn "sse2_cvtsd2si"
2373  [(set (match_operand:SI 0 "register_operand" "=r,r")
2374	(unspec:SI
2375	  [(vec_select:DF
2376	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2377	     (parallel [(const_int 0)]))]
2378	  UNSPEC_FIX_NOTRUNC))]
2379  "TARGET_SSE2"
2380  "%vcvtsd2si\t{%1, %0|%0, %1}"
2381  [(set_attr "type" "sseicvt")
2382   (set_attr "athlon_decode" "double,vector")
2383   (set_attr "bdver1_decode" "double,double")
2384   (set_attr "prefix_rep" "1")
2385   (set_attr "prefix" "maybe_vex")
2386   (set_attr "mode" "SI")])
2387
2388(define_insn "sse2_cvtsd2si_2"
2389  [(set (match_operand:SI 0 "register_operand" "=r,r")
2390	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2391		   UNSPEC_FIX_NOTRUNC))]
2392  "TARGET_SSE2"
2393  "%vcvtsd2si\t{%1, %0|%0, %1}"
2394  [(set_attr "type" "sseicvt")
2395   (set_attr "athlon_decode" "double,vector")
2396   (set_attr "amdfam10_decode" "double,double")
2397   (set_attr "bdver1_decode" "double,double")
2398   (set_attr "prefix_rep" "1")
2399   (set_attr "prefix" "maybe_vex")
2400   (set_attr "mode" "SI")])
2401
2402(define_insn "sse2_cvtsd2siq"
2403  [(set (match_operand:DI 0 "register_operand" "=r,r")
2404	(unspec:DI
2405	  [(vec_select:DF
2406	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2407	     (parallel [(const_int 0)]))]
2408	  UNSPEC_FIX_NOTRUNC))]
2409  "TARGET_SSE2 && TARGET_64BIT"
2410  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2411  [(set_attr "type" "sseicvt")
2412   (set_attr "athlon_decode" "double,vector")
2413   (set_attr "bdver1_decode" "double,double")
2414   (set_attr "prefix_rep" "1")
2415   (set_attr "prefix" "maybe_vex")
2416   (set_attr "mode" "DI")])
2417
2418(define_insn "sse2_cvtsd2siq_2"
2419  [(set (match_operand:DI 0 "register_operand" "=r,r")
2420	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2421		   UNSPEC_FIX_NOTRUNC))]
2422  "TARGET_SSE2 && TARGET_64BIT"
2423  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2424  [(set_attr "type" "sseicvt")
2425   (set_attr "athlon_decode" "double,vector")
2426   (set_attr "amdfam10_decode" "double,double")
2427   (set_attr "bdver1_decode" "double,double")
2428   (set_attr "prefix_rep" "1")
2429   (set_attr "prefix" "maybe_vex")
2430   (set_attr "mode" "DI")])
2431
2432(define_insn "sse2_cvttsd2si"
2433  [(set (match_operand:SI 0 "register_operand" "=r,r")
2434	(fix:SI
2435	  (vec_select:DF
2436	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2437	    (parallel [(const_int 0)]))))]
2438  "TARGET_SSE2"
2439  "%vcvttsd2si\t{%1, %0|%0, %1}"
2440  [(set_attr "type" "sseicvt")
2441   (set_attr "athlon_decode" "double,vector")
2442   (set_attr "amdfam10_decode" "double,double")
2443   (set_attr "bdver1_decode" "double,double")
2444   (set_attr "prefix_rep" "1")
2445   (set_attr "prefix" "maybe_vex")
2446   (set_attr "mode" "SI")])
2447
2448(define_insn "sse2_cvttsd2siq"
2449  [(set (match_operand:DI 0 "register_operand" "=r,r")
2450	(fix:DI
2451	  (vec_select:DF
2452	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2453	    (parallel [(const_int 0)]))))]
2454  "TARGET_SSE2 && TARGET_64BIT"
2455  "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2456  [(set_attr "type" "sseicvt")
2457   (set_attr "athlon_decode" "double,vector")
2458   (set_attr "amdfam10_decode" "double,double")
2459   (set_attr "bdver1_decode" "double,double")
2460   (set_attr "prefix_rep" "1")
2461   (set_attr "prefix" "maybe_vex")
2462   (set_attr "mode" "DI")])
2463
2464(define_insn "floatv4siv4df2"
2465  [(set (match_operand:V4DF 0 "register_operand" "=x")
2466	(float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2467  "TARGET_AVX"
2468  "vcvtdq2pd\t{%1, %0|%0, %1}"
2469  [(set_attr "type" "ssecvt")
2470   (set_attr "prefix" "vex")
2471   (set_attr "mode" "V4DF")])
2472
2473(define_insn "avx_cvtdq2pd256_2"
2474  [(set (match_operand:V4DF 0 "register_operand" "=x")
2475	(float:V4DF
2476	  (vec_select:V4SI
2477	    (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2478	    (parallel [(const_int 0) (const_int 1)
2479		       (const_int 2) (const_int 3)]))))]
2480  "TARGET_AVX"
2481  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2482  [(set_attr "type" "ssecvt")
2483   (set_attr "prefix" "vex")
2484   (set_attr "mode" "V4DF")])
2485
2486(define_insn "sse2_cvtdq2pd"
2487  [(set (match_operand:V2DF 0 "register_operand" "=x")
2488	(float:V2DF
2489	  (vec_select:V2SI
2490	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2491	    (parallel [(const_int 0) (const_int 1)]))))]
2492  "TARGET_SSE2"
2493  "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2494  [(set_attr "type" "ssecvt")
2495   (set_attr "prefix" "maybe_vex")
2496   (set_attr "mode" "V2DF")])
2497
2498(define_insn "avx_cvtpd2dq256"
2499  [(set (match_operand:V4SI 0 "register_operand" "=x")
2500	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2501		     UNSPEC_FIX_NOTRUNC))]
2502  "TARGET_AVX"
2503  "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2504  [(set_attr "type" "ssecvt")
2505   (set_attr "prefix" "vex")
2506   (set_attr "mode" "OI")])
2507
2508(define_expand "avx_cvtpd2dq256_2"
2509  [(set (match_operand:V8SI 0 "register_operand" "")
2510	(vec_concat:V8SI
2511	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2512		       UNSPEC_FIX_NOTRUNC)
2513	  (match_dup 2)))]
2514  "TARGET_AVX"
2515  "operands[2] = CONST0_RTX (V4SImode);")
2516
2517(define_insn "*avx_cvtpd2dq256_2"
2518  [(set (match_operand:V8SI 0 "register_operand" "=x")
2519	(vec_concat:V8SI
2520	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2521		       UNSPEC_FIX_NOTRUNC)
2522	  (match_operand:V4SI 2 "const0_operand" "")))]
2523  "TARGET_AVX"
2524  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2525  [(set_attr "type" "ssecvt")
2526   (set_attr "prefix" "vex")
2527   (set_attr "mode" "OI")])
2528
2529(define_expand "sse2_cvtpd2dq"
2530  [(set (match_operand:V4SI 0 "register_operand" "")
2531	(vec_concat:V4SI
2532	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2533		       UNSPEC_FIX_NOTRUNC)
2534	  (match_dup 2)))]
2535  "TARGET_SSE2"
2536  "operands[2] = CONST0_RTX (V2SImode);")
2537
2538(define_insn "*sse2_cvtpd2dq"
2539  [(set (match_operand:V4SI 0 "register_operand" "=x")
2540	(vec_concat:V4SI
2541	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2542		       UNSPEC_FIX_NOTRUNC)
2543	  (match_operand:V2SI 2 "const0_operand" "")))]
2544  "TARGET_SSE2"
2545{
2546  if (TARGET_AVX)
2547    return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2548  else
2549    return "cvtpd2dq\t{%1, %0|%0, %1}";
2550}
2551  [(set_attr "type" "ssecvt")
2552   (set_attr "prefix_rep" "1")
2553   (set_attr "prefix_data16" "0")
2554   (set_attr "prefix" "maybe_vex")
2555   (set_attr "mode" "TI")
2556   (set_attr "amdfam10_decode" "double")
2557   (set_attr "athlon_decode" "vector")
2558   (set_attr "bdver1_decode" "double")])
2559
2560(define_insn "fix_truncv4dfv4si2"
2561  [(set (match_operand:V4SI 0 "register_operand" "=x")
2562	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2563  "TARGET_AVX"
2564  "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2565  [(set_attr "type" "ssecvt")
2566   (set_attr "prefix" "vex")
2567   (set_attr "mode" "OI")])
2568
2569(define_expand "avx_cvttpd2dq256_2"
2570  [(set (match_operand:V8SI 0 "register_operand" "")
2571	(vec_concat:V8SI
2572	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2573	  (match_dup 2)))]
2574  "TARGET_AVX"
2575  "operands[2] = CONST0_RTX (V4SImode);")
2576
2577(define_insn "*avx_cvttpd2dq256_2"
2578  [(set (match_operand:V8SI 0 "register_operand" "=x")
2579	(vec_concat:V8SI
2580	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2581	  (match_operand:V4SI 2 "const0_operand" "")))]
2582  "TARGET_AVX"
2583  "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2584  [(set_attr "type" "ssecvt")
2585   (set_attr "prefix" "vex")
2586   (set_attr "mode" "OI")])
2587
2588(define_expand "sse2_cvttpd2dq"
2589  [(set (match_operand:V4SI 0 "register_operand" "")
2590	(vec_concat:V4SI
2591	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2592	  (match_dup 2)))]
2593  "TARGET_SSE2"
2594  "operands[2] = CONST0_RTX (V2SImode);")
2595
2596(define_insn "*sse2_cvttpd2dq"
2597  [(set (match_operand:V4SI 0 "register_operand" "=x")
2598	(vec_concat:V4SI
2599	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2600	  (match_operand:V2SI 2 "const0_operand" "")))]
2601  "TARGET_SSE2"
2602{
2603  if (TARGET_AVX)
2604    return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2605  else
2606    return "cvttpd2dq\t{%1, %0|%0, %1}";
2607}
2608  [(set_attr "type" "ssecvt")
2609   (set_attr "amdfam10_decode" "double")
2610   (set_attr "athlon_decode" "vector")
2611   (set_attr "bdver1_decode" "double")
2612   (set_attr "prefix" "maybe_vex")
2613   (set_attr "mode" "TI")])
2614
2615(define_insn "sse2_cvtsd2ss"
2616  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2617	(vec_merge:V4SF
2618	  (vec_duplicate:V4SF
2619	    (float_truncate:V2SF
2620	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2621	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2622	  (const_int 1)))]
2623  "TARGET_SSE2"
2624  "@
2625   cvtsd2ss\t{%2, %0|%0, %2}
2626   cvtsd2ss\t{%2, %0|%0, %2}
2627   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2628  [(set_attr "isa" "noavx,noavx,avx")
2629   (set_attr "type" "ssecvt")
2630   (set_attr "athlon_decode" "vector,double,*")
2631   (set_attr "amdfam10_decode" "vector,double,*")
2632   (set_attr "bdver1_decode" "direct,direct,*")
2633   (set_attr "prefix" "orig,orig,vex")
2634   (set_attr "mode" "SF")])
2635
2636(define_insn "sse2_cvtss2sd"
2637  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2638	(vec_merge:V2DF
2639	  (float_extend:V2DF
2640	    (vec_select:V2SF
2641	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2642	      (parallel [(const_int 0) (const_int 1)])))
2643	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2644	  (const_int 1)))]
2645  "TARGET_SSE2"
2646  "@
2647   cvtss2sd\t{%2, %0|%0, %2}
2648   cvtss2sd\t{%2, %0|%0, %2}
2649   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2650  [(set_attr "isa" "noavx,noavx,avx")
2651   (set_attr "type" "ssecvt")
2652   (set_attr "amdfam10_decode" "vector,double,*")
2653   (set_attr "athlon_decode" "direct,direct,*")
2654   (set_attr "bdver1_decode" "direct,direct,*")
2655   (set_attr "prefix" "orig,orig,vex")
2656   (set_attr "mode" "DF")])
2657
2658(define_insn "avx_cvtpd2ps256"
2659  [(set (match_operand:V4SF 0 "register_operand" "=x")
2660	(float_truncate:V4SF
2661	  (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2662  "TARGET_AVX"
2663  "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2664  [(set_attr "type" "ssecvt")
2665   (set_attr "prefix" "vex")
2666   (set_attr "mode" "V4SF")])
2667
2668(define_expand "sse2_cvtpd2ps"
2669  [(set (match_operand:V4SF 0 "register_operand" "")
2670	(vec_concat:V4SF
2671	  (float_truncate:V2SF
2672	    (match_operand:V2DF 1 "nonimmediate_operand" ""))
2673	  (match_dup 2)))]
2674  "TARGET_SSE2"
2675  "operands[2] = CONST0_RTX (V2SFmode);")
2676
2677(define_insn "*sse2_cvtpd2ps"
2678  [(set (match_operand:V4SF 0 "register_operand" "=x")
2679	(vec_concat:V4SF
2680	  (float_truncate:V2SF
2681	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2682	  (match_operand:V2SF 2 "const0_operand" "")))]
2683  "TARGET_SSE2"
2684{
2685  if (TARGET_AVX)
2686    return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2687  else
2688    return "cvtpd2ps\t{%1, %0|%0, %1}";
2689}
2690  [(set_attr "type" "ssecvt")
2691   (set_attr "amdfam10_decode" "double")
2692   (set_attr "athlon_decode" "vector")
2693   (set_attr "bdver1_decode" "double")
2694   (set_attr "prefix_data16" "1")
2695   (set_attr "prefix" "maybe_vex")
2696   (set_attr "mode" "V4SF")])
2697
2698(define_insn "avx_cvtps2pd256"
2699  [(set (match_operand:V4DF 0 "register_operand" "=x")
2700	(float_extend:V4DF
2701	  (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2702  "TARGET_AVX"
2703  "vcvtps2pd\t{%1, %0|%0, %1}"
2704  [(set_attr "type" "ssecvt")
2705   (set_attr "prefix" "vex")
2706   (set_attr "mode" "V4DF")])
2707
2708(define_insn "*avx_cvtps2pd256_2"
2709  [(set (match_operand:V4DF 0 "register_operand" "=x")
2710	(float_extend:V4DF
2711	  (vec_select:V4SF
2712	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2713	    (parallel [(const_int 0) (const_int 1)
2714		       (const_int 2) (const_int 3)]))))]
2715  "TARGET_AVX"
2716  "vcvtps2pd\t{%x1, %0|%0, %x1}"
2717  [(set_attr "type" "ssecvt")
2718   (set_attr "prefix" "vex")
2719   (set_attr "mode" "V4DF")])
2720
2721(define_insn "sse2_cvtps2pd"
2722  [(set (match_operand:V2DF 0 "register_operand" "=x")
2723	(float_extend:V2DF
2724	  (vec_select:V2SF
2725	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2726	    (parallel [(const_int 0) (const_int 1)]))))]
2727  "TARGET_SSE2"
2728  "%vcvtps2pd\t{%1, %0|%0, %q1}"
2729  [(set_attr "type" "ssecvt")
2730   (set_attr "amdfam10_decode" "direct")
2731   (set_attr "athlon_decode" "double")
2732   (set_attr "bdver1_decode" "double")
2733   (set_attr "prefix_data16" "0")
2734   (set_attr "prefix" "maybe_vex")
2735   (set_attr "mode" "V2DF")])
2736
2737(define_expand "vec_unpacks_hi_v4sf"
2738  [(set (match_dup 2)
2739   (vec_select:V4SF
2740     (vec_concat:V8SF
2741       (match_dup 2)
2742       (match_operand:V4SF 1 "nonimmediate_operand" ""))
2743     (parallel [(const_int 6) (const_int 7)
2744		(const_int 2) (const_int 3)])))
2745  (set (match_operand:V2DF 0 "register_operand" "")
2746   (float_extend:V2DF
2747     (vec_select:V2SF
2748       (match_dup 2)
2749       (parallel [(const_int 0) (const_int 1)]))))]
2750  "TARGET_SSE2"
2751  "operands[2] = gen_reg_rtx (V4SFmode);")
2752
2753(define_expand "vec_unpacks_hi_v8sf"
2754  [(set (match_dup 2)
2755	(vec_select:V4SF
2756	  (match_operand:V8SF 1 "nonimmediate_operand" "")
2757	  (parallel [(const_int 4) (const_int 5)
2758		     (const_int 6) (const_int 7)])))
2759   (set (match_operand:V4DF 0 "register_operand" "")
2760	(float_extend:V4DF
2761	  (match_dup 2)))]
2762  "TARGET_AVX"
2763  "operands[2] = gen_reg_rtx (V4SFmode);")
2764
2765(define_expand "vec_unpacks_lo_v4sf"
2766  [(set (match_operand:V2DF 0 "register_operand" "")
2767	(float_extend:V2DF
2768	  (vec_select:V2SF
2769	    (match_operand:V4SF 1 "nonimmediate_operand" "")
2770	    (parallel [(const_int 0) (const_int 1)]))))]
2771  "TARGET_SSE2")
2772
2773(define_expand "vec_unpacks_lo_v8sf"
2774  [(set (match_operand:V4DF 0 "register_operand" "")
2775	(float_extend:V4DF
2776	  (vec_select:V4SF
2777	    (match_operand:V8SF 1 "nonimmediate_operand" "")
2778	    (parallel [(const_int 0) (const_int 1)
2779		       (const_int 2) (const_int 3)]))))]
2780  "TARGET_AVX")
2781
2782(define_mode_attr sseunpackfltmode
2783  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2784
2785(define_expand "vec_unpacks_float_hi_<mode>"
2786  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2787   (match_operand:VI2_AVX2 1 "register_operand" "")]
2788  "TARGET_SSE2"
2789{
2790  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2791
2792  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2793  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2794			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2795  DONE;
2796})
2797
2798(define_expand "vec_unpacks_float_lo_<mode>"
2799  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2800   (match_operand:VI2_AVX2 1 "register_operand" "")]
2801  "TARGET_SSE2"
2802{
2803  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2804
2805  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2806  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2807			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2808  DONE;
2809})
2810
2811(define_expand "vec_unpacku_float_hi_<mode>"
2812  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2813   (match_operand:VI2_AVX2 1 "register_operand" "")]
2814  "TARGET_SSE2"
2815{
2816  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2817
2818  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2819  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2820			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2821  DONE;
2822})
2823
2824(define_expand "vec_unpacku_float_lo_<mode>"
2825  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2826   (match_operand:VI2_AVX2 1 "register_operand" "")]
2827  "TARGET_SSE2"
2828{
2829  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2830
2831  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2832  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2833			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2834  DONE;
2835})
2836
2837(define_expand "vec_unpacks_float_hi_v4si"
2838  [(set (match_dup 2)
2839	(vec_select:V4SI
2840	  (match_operand:V4SI 1 "nonimmediate_operand" "")
2841	  (parallel [(const_int 2) (const_int 3)
2842		     (const_int 2) (const_int 3)])))
2843   (set (match_operand:V2DF 0 "register_operand" "")
2844	(float:V2DF
2845	  (vec_select:V2SI
2846	  (match_dup 2)
2847	    (parallel [(const_int 0) (const_int 1)]))))]
2848  "TARGET_SSE2"
2849  "operands[2] = gen_reg_rtx (V4SImode);")
2850
2851(define_expand "vec_unpacks_float_lo_v4si"
2852  [(set (match_operand:V2DF 0 "register_operand" "")
2853	(float:V2DF
2854	  (vec_select:V2SI
2855	    (match_operand:V4SI 1 "nonimmediate_operand" "")
2856	    (parallel [(const_int 0) (const_int 1)]))))]
2857  "TARGET_SSE2")
2858
2859(define_expand "vec_unpacks_float_hi_v8si"
2860  [(set (match_dup 2)
2861	(vec_select:V4SI
2862	  (match_operand:V8SI 1 "nonimmediate_operand" "")
2863	  (parallel [(const_int 4) (const_int 5)
2864		     (const_int 6) (const_int 7)])))
2865   (set (match_operand:V4DF 0 "register_operand" "")
2866	(float:V4DF
2867	  (match_dup 2)))]
2868  "TARGET_AVX"
2869  "operands[2] = gen_reg_rtx (V4SImode);")
2870
2871(define_expand "vec_unpacks_float_lo_v8si"
2872  [(set (match_operand:V4DF 0 "register_operand" "")
2873	(float:V4DF
2874	  (vec_select:V4SI
2875	    (match_operand:V8SI 1 "nonimmediate_operand" "")
2876	    (parallel [(const_int 0) (const_int 1)
2877		       (const_int 2) (const_int 3)]))))]
2878  "TARGET_AVX")
2879
2880(define_expand "vec_unpacku_float_hi_v4si"
2881  [(set (match_dup 5)
2882	(vec_select:V4SI
2883	  (match_operand:V4SI 1 "nonimmediate_operand" "")
2884	  (parallel [(const_int 2) (const_int 3)
2885		     (const_int 2) (const_int 3)])))
2886   (set (match_dup 6)
2887	(float:V2DF
2888	  (vec_select:V2SI
2889	  (match_dup 5)
2890	    (parallel [(const_int 0) (const_int 1)]))))
2891   (set (match_dup 7)
2892	(lt:V2DF (match_dup 6) (match_dup 3)))
2893   (set (match_dup 8)
2894	(and:V2DF (match_dup 7) (match_dup 4)))
2895   (set (match_operand:V2DF 0 "register_operand" "")
2896	(plus:V2DF (match_dup 6) (match_dup 8)))]
2897  "TARGET_SSE2"
2898{
2899  REAL_VALUE_TYPE TWO32r;
2900  rtx x;
2901  int i;
2902
2903  real_ldexp (&TWO32r, &dconst1, 32);
2904  x = const_double_from_real_value (TWO32r, DFmode);
2905
2906  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2907  operands[4] = force_reg (V2DFmode,
2908			   ix86_build_const_vector (V2DFmode, 1, x));
2909
2910  operands[5] = gen_reg_rtx (V4SImode);
2911
2912  for (i = 6; i < 9; i++)
2913    operands[i] = gen_reg_rtx (V2DFmode);
2914})
2915
2916(define_expand "vec_unpacku_float_lo_v4si"
2917  [(set (match_dup 5)
2918	(float:V2DF
2919	  (vec_select:V2SI
2920	    (match_operand:V4SI 1 "nonimmediate_operand" "")
2921	    (parallel [(const_int 0) (const_int 1)]))))
2922   (set (match_dup 6)
2923	(lt:V2DF (match_dup 5) (match_dup 3)))
2924   (set (match_dup 7)
2925	(and:V2DF (match_dup 6) (match_dup 4)))
2926   (set (match_operand:V2DF 0 "register_operand" "")
2927	(plus:V2DF (match_dup 5) (match_dup 7)))]
2928  "TARGET_SSE2"
2929{
2930  REAL_VALUE_TYPE TWO32r;
2931  rtx x;
2932  int i;
2933
2934  real_ldexp (&TWO32r, &dconst1, 32);
2935  x = const_double_from_real_value (TWO32r, DFmode);
2936
2937  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2938  operands[4] = force_reg (V2DFmode,
2939			   ix86_build_const_vector (V2DFmode, 1, x));
2940
2941  for (i = 5; i < 8; i++)
2942    operands[i] = gen_reg_rtx (V2DFmode);
2943})
2944
2945(define_expand "vec_unpacku_float_hi_v8si"
2946  [(match_operand:V4DF 0 "register_operand" "")
2947   (match_operand:V8SI 1 "register_operand" "")]
2948  "TARGET_AVX"
2949{
2950  REAL_VALUE_TYPE TWO32r;
2951  rtx x, tmp[6];
2952  int i;
2953
2954  real_ldexp (&TWO32r, &dconst1, 32);
2955  x = const_double_from_real_value (TWO32r, DFmode);
2956
2957  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2958  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2959  tmp[5] = gen_reg_rtx (V4SImode);
2960
2961  for (i = 2; i < 5; i++)
2962    tmp[i] = gen_reg_rtx (V4DFmode);
2963  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2964  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2965  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2966			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2967  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2968  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2969  DONE;
2970})
2971
2972(define_expand "vec_unpacku_float_lo_v8si"
2973  [(match_operand:V4DF 0 "register_operand" "")
2974   (match_operand:V8SI 1 "nonimmediate_operand" "")]
2975  "TARGET_AVX"
2976{
2977  REAL_VALUE_TYPE TWO32r;
2978  rtx x, tmp[5];
2979  int i;
2980
2981  real_ldexp (&TWO32r, &dconst1, 32);
2982  x = const_double_from_real_value (TWO32r, DFmode);
2983
2984  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2985  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2986
2987  for (i = 2; i < 5; i++)
2988    tmp[i] = gen_reg_rtx (V4DFmode);
2989  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
2990  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2991			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2992  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2993  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2994  DONE;
2995})
2996
2997(define_expand "vec_pack_trunc_v4df"
2998  [(set (match_dup 3)
2999	(float_truncate:V4SF
3000	  (match_operand:V4DF 1 "nonimmediate_operand" "")))
3001   (set (match_dup 4)
3002	(float_truncate:V4SF
3003	  (match_operand:V4DF 2 "nonimmediate_operand" "")))
3004   (set (match_operand:V8SF 0 "register_operand" "")
3005	(vec_concat:V8SF
3006	  (match_dup 3)
3007	  (match_dup 4)))]
3008  "TARGET_AVX"
3009{
3010  operands[3] = gen_reg_rtx (V4SFmode);
3011  operands[4] = gen_reg_rtx (V4SFmode);
3012})
3013
3014(define_expand "vec_pack_trunc_v2df"
3015  [(match_operand:V4SF 0 "register_operand" "")
3016   (match_operand:V2DF 1 "nonimmediate_operand" "")
3017   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3018  "TARGET_SSE2"
3019{
3020  rtx tmp0, tmp1;
3021
3022  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3023    {
3024      tmp0 = gen_reg_rtx (V4DFmode);
3025      tmp1 = force_reg (V2DFmode, operands[1]);
3026
3027      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3028      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3029    }
3030  else
3031    {
3032      tmp0 = gen_reg_rtx (V4SFmode);
3033      tmp1 = gen_reg_rtx (V4SFmode);
3034
3035      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3036      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3037      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3038    }
3039  DONE;
3040})
3041
3042(define_expand "vec_pack_sfix_trunc_v4df"
3043  [(match_operand:V8SI 0 "register_operand" "")
3044   (match_operand:V4DF 1 "nonimmediate_operand" "")
3045   (match_operand:V4DF 2 "nonimmediate_operand" "")]
3046  "TARGET_AVX"
3047{
3048  rtx r1, r2;
3049
3050  r1 = gen_reg_rtx (V4SImode);
3051  r2 = gen_reg_rtx (V4SImode);
3052
3053  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3054  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3055  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3056  DONE;
3057})
3058
3059(define_expand "vec_pack_sfix_trunc_v2df"
3060  [(match_operand:V4SI 0 "register_operand" "")
3061   (match_operand:V2DF 1 "nonimmediate_operand" "")
3062   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3063  "TARGET_SSE2"
3064{
3065  rtx tmp0, tmp1;
3066
3067  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3068    {
3069      tmp0 = gen_reg_rtx (V4DFmode);
3070      tmp1 = force_reg (V2DFmode, operands[1]);
3071
3072      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3073      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3074    }
3075  else
3076    {
3077      tmp0 = gen_reg_rtx (V4SImode);
3078      tmp1 = gen_reg_rtx (V4SImode);
3079
3080      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3081      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3082      emit_insn
3083       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3084				    gen_lowpart (V2DImode, tmp0),
3085				    gen_lowpart (V2DImode, tmp1)));
3086    }
3087  DONE;
3088})
3089
3090(define_mode_attr ssepackfltmode
3091  [(V4DF "V8SI") (V2DF "V4SI")])
3092
3093(define_expand "vec_pack_ufix_trunc_<mode>"
3094  [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3095   (match_operand:VF2 1 "register_operand" "")
3096   (match_operand:VF2 2 "register_operand" "")]
3097  "TARGET_SSE2"
3098{
3099  rtx tmp[7];
3100  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3101  tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3102  tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3103  emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3104  if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3105    {
3106      tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3107      ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3108    }
3109  else
3110    {
3111      tmp[5] = gen_reg_rtx (V8SFmode);
3112      ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3113					gen_lowpart (V8SFmode, tmp[3]), 0);
3114      tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3115    }
3116  tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3117				operands[0], 0, OPTAB_DIRECT);
3118  if (tmp[6] != operands[0])
3119    emit_move_insn (operands[0], tmp[6]);
3120  DONE;
3121})
3122
3123(define_expand "vec_pack_sfix_v4df"
3124  [(match_operand:V8SI 0 "register_operand" "")
3125   (match_operand:V4DF 1 "nonimmediate_operand" "")
3126   (match_operand:V4DF 2 "nonimmediate_operand" "")]
3127  "TARGET_AVX"
3128{
3129  rtx r1, r2;
3130
3131  r1 = gen_reg_rtx (V4SImode);
3132  r2 = gen_reg_rtx (V4SImode);
3133
3134  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3135  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3136  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3137  DONE;
3138})
3139
3140(define_expand "vec_pack_sfix_v2df"
3141  [(match_operand:V4SI 0 "register_operand" "")
3142   (match_operand:V2DF 1 "nonimmediate_operand" "")
3143   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3144  "TARGET_SSE2"
3145{
3146  rtx tmp0, tmp1;
3147
3148  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3149    {
3150      tmp0 = gen_reg_rtx (V4DFmode);
3151      tmp1 = force_reg (V2DFmode, operands[1]);
3152
3153      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3154      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3155    }
3156  else
3157    {
3158      tmp0 = gen_reg_rtx (V4SImode);
3159      tmp1 = gen_reg_rtx (V4SImode);
3160
3161      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3162      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3163      emit_insn
3164       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3165				    gen_lowpart (V2DImode, tmp0),
3166				    gen_lowpart (V2DImode, tmp1)));
3167    }
3168  DONE;
3169})
3170
3171;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3172;;
3173;; Parallel single-precision floating point element swizzling
3174;;
3175;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3176
3177(define_expand "sse_movhlps_exp"
3178  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3179	(vec_select:V4SF
3180	  (vec_concat:V8SF
3181	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3182	    (match_operand:V4SF 2 "nonimmediate_operand" ""))
3183	  (parallel [(const_int 6)
3184		     (const_int 7)
3185		     (const_int 2)
3186		     (const_int 3)])))]
3187  "TARGET_SSE"
3188{
3189  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3190
3191  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3192
3193  /* Fix up the destination if needed.  */
3194  if (dst != operands[0])
3195    emit_move_insn (operands[0], dst);
3196
3197  DONE;
3198})
3199
3200(define_insn "sse_movhlps"
3201  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
3202	(vec_select:V4SF
3203	  (vec_concat:V8SF
3204	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3205	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3206	  (parallel [(const_int 6)
3207		     (const_int 7)
3208		     (const_int 2)
3209		     (const_int 3)])))]
3210  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3211  "@
3212   movhlps\t{%2, %0|%0, %2}
3213   vmovhlps\t{%2, %1, %0|%0, %1, %2}
3214   movlps\t{%H2, %0|%0, %H2}
3215   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3216   %vmovhps\t{%2, %0|%0, %2}"
3217  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3218   (set_attr "type" "ssemov")
3219   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3220   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3221
3222(define_expand "sse_movlhps_exp"
3223  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3224	(vec_select:V4SF
3225	  (vec_concat:V8SF
3226	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3227	    (match_operand:V4SF 2 "nonimmediate_operand" ""))
3228	  (parallel [(const_int 0)
3229		     (const_int 1)
3230		     (const_int 4)
3231		     (const_int 5)])))]
3232  "TARGET_SSE"
3233{
3234  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3235
3236  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3237
3238  /* Fix up the destination if needed.  */
3239  if (dst != operands[0])
3240    emit_move_insn (operands[0], dst);
3241
3242  DONE;
3243})
3244
3245(define_insn "sse_movlhps"
3246  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3247	(vec_select:V4SF
3248	  (vec_concat:V8SF
3249	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3250	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3251	  (parallel [(const_int 0)
3252		     (const_int 1)
3253		     (const_int 4)
3254		     (const_int 5)])))]
3255  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3256  "@
3257   movlhps\t{%2, %0|%0, %2}
3258   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3259   movhps\t{%2, %0|%0, %2}
3260   vmovhps\t{%2, %1, %0|%0, %1, %2}
3261   %vmovlps\t{%2, %H0|%H0, %2}"
3262  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3263   (set_attr "type" "ssemov")
3264   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3265   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3266
3267;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3268(define_insn "avx_unpckhps256"
3269  [(set (match_operand:V8SF 0 "register_operand" "=x")
3270	(vec_select:V8SF
3271	  (vec_concat:V16SF
3272	    (match_operand:V8SF 1 "register_operand" "x")
3273	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3274	  (parallel [(const_int 2) (const_int 10)
3275		     (const_int 3) (const_int 11)
3276		     (const_int 6) (const_int 14)
3277		     (const_int 7) (const_int 15)])))]
3278  "TARGET_AVX"
3279  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3280  [(set_attr "type" "sselog")
3281   (set_attr "prefix" "vex")
3282   (set_attr "mode" "V8SF")])
3283
3284(define_expand "vec_interleave_highv8sf"
3285  [(set (match_dup 3)
3286	(vec_select:V8SF
3287	  (vec_concat:V16SF
3288	    (match_operand:V8SF 1 "register_operand" "x")
3289	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3290	  (parallel [(const_int 0) (const_int 8)
3291		     (const_int 1) (const_int 9)
3292		     (const_int 4) (const_int 12)
3293		     (const_int 5) (const_int 13)])))
3294   (set (match_dup 4)
3295	(vec_select:V8SF
3296	  (vec_concat:V16SF
3297	    (match_dup 1)
3298	    (match_dup 2))
3299	  (parallel [(const_int 2) (const_int 10)
3300		     (const_int 3) (const_int 11)
3301		     (const_int 6) (const_int 14)
3302		     (const_int 7) (const_int 15)])))
3303   (set (match_operand:V8SF 0 "register_operand" "")
3304	(vec_select:V8SF
3305	  (vec_concat:V16SF
3306	    (match_dup 3)
3307	    (match_dup 4))
3308	  (parallel [(const_int 4) (const_int 5)
3309		     (const_int 6) (const_int 7)
3310		     (const_int 12) (const_int 13)
3311		     (const_int 14) (const_int 15)])))]
3312 "TARGET_AVX"
3313{
3314  operands[3] = gen_reg_rtx (V8SFmode);
3315  operands[4] = gen_reg_rtx (V8SFmode);
3316})
3317
3318(define_insn "vec_interleave_highv4sf"
3319  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3320	(vec_select:V4SF
3321	  (vec_concat:V8SF
3322	    (match_operand:V4SF 1 "register_operand" "0,x")
3323	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3324	  (parallel [(const_int 2) (const_int 6)
3325		     (const_int 3) (const_int 7)])))]
3326  "TARGET_SSE"
3327  "@
3328   unpckhps\t{%2, %0|%0, %2}
3329   vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3330  [(set_attr "isa" "noavx,avx")
3331   (set_attr "type" "sselog")
3332   (set_attr "prefix" "orig,vex")
3333   (set_attr "mode" "V4SF")])
3334
3335;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3336(define_insn "avx_unpcklps256"
3337  [(set (match_operand:V8SF 0 "register_operand" "=x")
3338	(vec_select:V8SF
3339	  (vec_concat:V16SF
3340	    (match_operand:V8SF 1 "register_operand" "x")
3341	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3342	  (parallel [(const_int 0) (const_int 8)
3343		     (const_int 1) (const_int 9)
3344		     (const_int 4) (const_int 12)
3345		     (const_int 5) (const_int 13)])))]
3346  "TARGET_AVX"
3347  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3348  [(set_attr "type" "sselog")
3349   (set_attr "prefix" "vex")
3350   (set_attr "mode" "V8SF")])
3351
3352(define_expand "vec_interleave_lowv8sf"
3353  [(set (match_dup 3)
3354	(vec_select:V8SF
3355	  (vec_concat:V16SF
3356	    (match_operand:V8SF 1 "register_operand" "x")
3357	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3358	  (parallel [(const_int 0) (const_int 8)
3359		     (const_int 1) (const_int 9)
3360		     (const_int 4) (const_int 12)
3361		     (const_int 5) (const_int 13)])))
3362   (set (match_dup 4)
3363	(vec_select:V8SF
3364	  (vec_concat:V16SF
3365	    (match_dup 1)
3366	    (match_dup 2))
3367	  (parallel [(const_int 2) (const_int 10)
3368		     (const_int 3) (const_int 11)
3369		     (const_int 6) (const_int 14)
3370		     (const_int 7) (const_int 15)])))
3371   (set (match_operand:V8SF 0 "register_operand" "")
3372	(vec_select:V8SF
3373	  (vec_concat:V16SF
3374	    (match_dup 3)
3375	    (match_dup 4))
3376	  (parallel [(const_int 0) (const_int 1)
3377		     (const_int 2) (const_int 3)
3378		     (const_int 8) (const_int 9)
3379		     (const_int 10) (const_int 11)])))]
3380 "TARGET_AVX"
3381{
3382  operands[3] = gen_reg_rtx (V8SFmode);
3383  operands[4] = gen_reg_rtx (V8SFmode);
3384})
3385
3386(define_insn "vec_interleave_lowv4sf"
3387  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3388	(vec_select:V4SF
3389	  (vec_concat:V8SF
3390	    (match_operand:V4SF 1 "register_operand" "0,x")
3391	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3392	  (parallel [(const_int 0) (const_int 4)
3393		     (const_int 1) (const_int 5)])))]
3394  "TARGET_SSE"
3395  "@
3396   unpcklps\t{%2, %0|%0, %2}
3397   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3398  [(set_attr "isa" "noavx,avx")
3399   (set_attr "type" "sselog")
3400   (set_attr "prefix" "orig,vex")
3401   (set_attr "mode" "V4SF")])
3402
3403;; These are modeled with the same vec_concat as the others so that we
3404;; capture users of shufps that can use the new instructions
3405(define_insn "avx_movshdup256"
3406  [(set (match_operand:V8SF 0 "register_operand" "=x")
3407	(vec_select:V8SF
3408	  (vec_concat:V16SF
3409	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3410	    (match_dup 1))
3411	  (parallel [(const_int 1) (const_int 1)
3412		     (const_int 3) (const_int 3)
3413		     (const_int 5) (const_int 5)
3414		     (const_int 7) (const_int 7)])))]
3415  "TARGET_AVX"
3416  "vmovshdup\t{%1, %0|%0, %1}"
3417  [(set_attr "type" "sse")
3418   (set_attr "prefix" "vex")
3419   (set_attr "mode" "V8SF")])
3420
3421(define_insn "sse3_movshdup"
3422  [(set (match_operand:V4SF 0 "register_operand" "=x")
3423	(vec_select:V4SF
3424	  (vec_concat:V8SF
3425	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3426	    (match_dup 1))
3427	  (parallel [(const_int 1)
3428		     (const_int 1)
3429		     (const_int 7)
3430		     (const_int 7)])))]
3431  "TARGET_SSE3"
3432  "%vmovshdup\t{%1, %0|%0, %1}"
3433  [(set_attr "type" "sse")
3434   (set_attr "prefix_rep" "1")
3435   (set_attr "prefix" "maybe_vex")
3436   (set_attr "mode" "V4SF")])
3437
3438(define_insn "avx_movsldup256"
3439  [(set (match_operand:V8SF 0 "register_operand" "=x")
3440	(vec_select:V8SF
3441	  (vec_concat:V16SF
3442	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3443	    (match_dup 1))
3444	  (parallel [(const_int 0) (const_int 0)
3445		     (const_int 2) (const_int 2)
3446		     (const_int 4) (const_int 4)
3447		     (const_int 6) (const_int 6)])))]
3448  "TARGET_AVX"
3449  "vmovsldup\t{%1, %0|%0, %1}"
3450  [(set_attr "type" "sse")
3451   (set_attr "prefix" "vex")
3452   (set_attr "mode" "V8SF")])
3453
3454(define_insn "sse3_movsldup"
3455  [(set (match_operand:V4SF 0 "register_operand" "=x")
3456	(vec_select:V4SF
3457	  (vec_concat:V8SF
3458	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3459	    (match_dup 1))
3460	  (parallel [(const_int 0)
3461		     (const_int 0)
3462		     (const_int 6)
3463		     (const_int 6)])))]
3464  "TARGET_SSE3"
3465  "%vmovsldup\t{%1, %0|%0, %1}"
3466  [(set_attr "type" "sse")
3467   (set_attr "prefix_rep" "1")
3468   (set_attr "prefix" "maybe_vex")
3469   (set_attr "mode" "V4SF")])
3470
3471(define_expand "avx_shufps256"
3472  [(match_operand:V8SF 0 "register_operand" "")
3473   (match_operand:V8SF 1 "register_operand" "")
3474   (match_operand:V8SF 2 "nonimmediate_operand" "")
3475   (match_operand:SI 3 "const_int_operand" "")]
3476  "TARGET_AVX"
3477{
3478  int mask = INTVAL (operands[3]);
3479  emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3480				  GEN_INT ((mask >> 0) & 3),
3481				  GEN_INT ((mask >> 2) & 3),
3482				  GEN_INT (((mask >> 4) & 3) + 8),
3483				  GEN_INT (((mask >> 6) & 3) + 8),
3484				  GEN_INT (((mask >> 0) & 3) + 4),
3485				  GEN_INT (((mask >> 2) & 3) + 4),
3486				  GEN_INT (((mask >> 4) & 3) + 12),
3487				  GEN_INT (((mask >> 6) & 3) + 12)));
3488  DONE;
3489})
3490
3491;; One bit in mask selects 2 elements.
3492(define_insn "avx_shufps256_1"
3493  [(set (match_operand:V8SF 0 "register_operand" "=x")
3494	(vec_select:V8SF
3495	  (vec_concat:V16SF
3496	    (match_operand:V8SF 1 "register_operand" "x")
3497	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3498	  (parallel [(match_operand 3  "const_0_to_3_operand"   "")
3499		     (match_operand 4  "const_0_to_3_operand"   "")
3500		     (match_operand 5  "const_8_to_11_operand"  "")
3501		     (match_operand 6  "const_8_to_11_operand"  "")
3502		     (match_operand 7  "const_4_to_7_operand"   "")
3503		     (match_operand 8  "const_4_to_7_operand"   "")
3504		     (match_operand 9  "const_12_to_15_operand" "")
3505		     (match_operand 10 "const_12_to_15_operand" "")])))]
3506  "TARGET_AVX
3507   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3508       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3509       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3510       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3511{
3512  int mask;
3513  mask = INTVAL (operands[3]);
3514  mask |= INTVAL (operands[4]) << 2;
3515  mask |= (INTVAL (operands[5]) - 8) << 4;
3516  mask |= (INTVAL (operands[6]) - 8) << 6;
3517  operands[3] = GEN_INT (mask);
3518
3519  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3520}
3521  [(set_attr "type" "sselog")
3522   (set_attr "length_immediate" "1")
3523   (set_attr "prefix" "vex")
3524   (set_attr "mode" "V8SF")])
3525
3526(define_expand "sse_shufps"
3527  [(match_operand:V4SF 0 "register_operand" "")
3528   (match_operand:V4SF 1 "register_operand" "")
3529   (match_operand:V4SF 2 "nonimmediate_operand" "")
3530   (match_operand:SI 3 "const_int_operand" "")]
3531  "TARGET_SSE"
3532{
3533  int mask = INTVAL (operands[3]);
3534  emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3535			       GEN_INT ((mask >> 0) & 3),
3536			       GEN_INT ((mask >> 2) & 3),
3537			       GEN_INT (((mask >> 4) & 3) + 4),
3538			       GEN_INT (((mask >> 6) & 3) + 4)));
3539  DONE;
3540})
3541
3542(define_insn "sse_shufps_<mode>"
3543  [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3544	(vec_select:VI4F_128
3545	  (vec_concat:<ssedoublevecmode>
3546	    (match_operand:VI4F_128 1 "register_operand" "0,x")
3547	    (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3548	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
3549		     (match_operand 4 "const_0_to_3_operand" "")
3550		     (match_operand 5 "const_4_to_7_operand" "")
3551		     (match_operand 6 "const_4_to_7_operand" "")])))]
3552  "TARGET_SSE"
3553{
3554  int mask = 0;
3555  mask |= INTVAL (operands[3]) << 0;
3556  mask |= INTVAL (operands[4]) << 2;
3557  mask |= (INTVAL (operands[5]) - 4) << 4;
3558  mask |= (INTVAL (operands[6]) - 4) << 6;
3559  operands[3] = GEN_INT (mask);
3560
3561  switch (which_alternative)
3562    {
3563    case 0:
3564      return "shufps\t{%3, %2, %0|%0, %2, %3}";
3565    case 1:
3566      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3567    default:
3568      gcc_unreachable ();
3569    }
3570}
3571  [(set_attr "isa" "noavx,avx")
3572   (set_attr "type" "sselog")
3573   (set_attr "length_immediate" "1")
3574   (set_attr "prefix" "orig,vex")
3575   (set_attr "mode" "V4SF")])
3576
3577(define_insn "sse_storehps"
3578  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3579	(vec_select:V2SF
3580	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3581	  (parallel [(const_int 2) (const_int 3)])))]
3582  "TARGET_SSE"
3583  "@
3584   %vmovhps\t{%1, %0|%0, %1}
3585   %vmovhlps\t{%1, %d0|%d0, %1}
3586   %vmovlps\t{%H1, %d0|%d0, %H1}"
3587  [(set_attr "type" "ssemov")
3588   (set_attr "prefix" "maybe_vex")
3589   (set_attr "mode" "V2SF,V4SF,V2SF")])
3590
3591(define_expand "sse_loadhps_exp"
3592  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3593	(vec_concat:V4SF
3594	  (vec_select:V2SF
3595	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3596	    (parallel [(const_int 0) (const_int 1)]))
3597	  (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3598  "TARGET_SSE"
3599{
3600  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3601
3602  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3603
3604  /* Fix up the destination if needed.  */
3605  if (dst != operands[0])
3606    emit_move_insn (operands[0], dst);
3607
3608  DONE;
3609})
3610
3611(define_insn "sse_loadhps"
3612  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3613	(vec_concat:V4SF
3614	  (vec_select:V2SF
3615	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3616	    (parallel [(const_int 0) (const_int 1)]))
3617	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,x,x")))]
3618  "TARGET_SSE"
3619  "@
3620   movhps\t{%2, %0|%0, %2}
3621   vmovhps\t{%2, %1, %0|%0, %1, %2}
3622   movlhps\t{%2, %0|%0, %2}
3623   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3624   %vmovlps\t{%2, %H0|%H0, %2}"
3625  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3626   (set_attr "type" "ssemov")
3627   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3628   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3629
3630(define_insn "sse_storelps"
3631  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,x,x")
3632	(vec_select:V2SF
3633	  (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3634	  (parallel [(const_int 0) (const_int 1)])))]
3635  "TARGET_SSE"
3636  "@
3637   %vmovlps\t{%1, %0|%0, %1}
3638   %vmovaps\t{%1, %0|%0, %1}
3639   %vmovlps\t{%1, %d0|%d0, %1}"
3640  [(set_attr "type" "ssemov")
3641   (set_attr "prefix" "maybe_vex")
3642   (set_attr "mode" "V2SF,V4SF,V2SF")])
3643
3644(define_expand "sse_loadlps_exp"
3645  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3646	(vec_concat:V4SF
3647	  (match_operand:V2SF 2 "nonimmediate_operand" "")
3648	  (vec_select:V2SF
3649	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3650	    (parallel [(const_int 2) (const_int 3)]))))]
3651  "TARGET_SSE"
3652{
3653  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3654
3655  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3656
3657  /* Fix up the destination if needed.  */
3658  if (dst != operands[0])
3659    emit_move_insn (operands[0], dst);
3660
3661  DONE;
3662})
3663
3664(define_insn "sse_loadlps"
3665  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
3666	(vec_concat:V4SF
3667	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,x,m,m,x")
3668	  (vec_select:V2SF
3669	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3670	    (parallel [(const_int 2) (const_int 3)]))))]
3671  "TARGET_SSE"
3672  "@
3673   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3674   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3675   movlps\t{%2, %0|%0, %2}
3676   vmovlps\t{%2, %1, %0|%0, %1, %2}
3677   %vmovlps\t{%2, %0|%0, %2}"
3678  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3679   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3680   (set_attr "length_immediate" "1,1,*,*,*")
3681   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3682   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3683
3684(define_insn "sse_movss"
3685  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
3686	(vec_merge:V4SF
3687	  (match_operand:V4SF 2 "register_operand" " x,x")
3688	  (match_operand:V4SF 1 "register_operand" " 0,x")
3689	  (const_int 1)))]
3690  "TARGET_SSE"
3691  "@
3692   movss\t{%2, %0|%0, %2}
3693   vmovss\t{%2, %1, %0|%0, %1, %2}"
3694  [(set_attr "isa" "noavx,avx")
3695   (set_attr "type" "ssemov")
3696   (set_attr "prefix" "orig,vex")
3697   (set_attr "mode" "SF")])
3698
3699(define_insn "avx2_vec_dup<mode>"
3700  [(set (match_operand:VF1 0 "register_operand" "=x")
3701	(vec_duplicate:VF1
3702	  (vec_select:SF
3703	    (match_operand:V4SF 1 "register_operand" "x")
3704	    (parallel [(const_int 0)]))))]
3705  "TARGET_AVX2"
3706  "vbroadcastss\t{%1, %0|%0, %1}"
3707  [(set_attr "type" "sselog1")
3708    (set_attr "prefix" "vex")
3709    (set_attr "mode" "<MODE>")])
3710
3711(define_insn "vec_dupv4sf"
3712  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3713	(vec_duplicate:V4SF
3714	  (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3715  "TARGET_SSE"
3716  "@
3717   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3718   vbroadcastss\t{%1, %0|%0, %1}
3719   shufps\t{$0, %0, %0|%0, %0, 0}"
3720  [(set_attr "isa" "avx,avx,noavx")
3721   (set_attr "type" "sselog1,ssemov,sselog1")
3722   (set_attr "length_immediate" "1,0,1")
3723   (set_attr "prefix_extra" "0,1,*")
3724   (set_attr "prefix" "vex,vex,orig")
3725   (set_attr "mode" "V4SF")])
3726
3727;; Although insertps takes register source, we prefer
3728;; unpcklps with register source since it is shorter.
3729(define_insn "*vec_concatv2sf_sse4_1"
3730  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,x,x,*y ,*y")
3731	(vec_concat:V2SF
3732	  (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3733	  (match_operand:SF 2 "vector_move_operand"  " x,x,m,m,C,*ym, C")))]
3734  "TARGET_SSE4_1"
3735  "@
3736   unpcklps\t{%2, %0|%0, %2}
3737   vunpcklps\t{%2, %1, %0|%0, %1, %2}
3738   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3739   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3740   %vmovss\t{%1, %0|%0, %1}
3741   punpckldq\t{%2, %0|%0, %2}
3742   movd\t{%1, %0|%0, %1}"
3743  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3744   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3745   (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3746   (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3747   (set_attr "length_immediate" "*,*,1,1,*,*,*")
3748   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3749   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3750
3751;; ??? In theory we can match memory for the MMX alternative, but allowing
3752;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3753;; alternatives pretty much forces the MMX alternative to be chosen.
3754(define_insn "*vec_concatv2sf_sse"
3755  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
3756	(vec_concat:V2SF
3757	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3758	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
3759  "TARGET_SSE"
3760  "@
3761   unpcklps\t{%2, %0|%0, %2}
3762   movss\t{%1, %0|%0, %1}
3763   punpckldq\t{%2, %0|%0, %2}
3764   movd\t{%1, %0|%0, %1}"
3765  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3766   (set_attr "mode" "V4SF,SF,DI,DI")])
3767
3768(define_insn "*vec_concatv4sf"
3769  [(set (match_operand:V4SF 0 "register_operand"       "=x,x,x,x")
3770	(vec_concat:V4SF
3771	  (match_operand:V2SF 1 "register_operand"     " 0,x,0,x")
3772	  (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3773  "TARGET_SSE"
3774  "@
3775   movlhps\t{%2, %0|%0, %2}
3776   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3777   movhps\t{%2, %0|%0, %2}
3778   vmovhps\t{%2, %1, %0|%0, %1, %2}"
3779  [(set_attr "isa" "noavx,avx,noavx,avx")
3780   (set_attr "type" "ssemov")
3781   (set_attr "prefix" "orig,vex,orig,vex")
3782   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3783
3784(define_expand "vec_init<mode>"
3785  [(match_operand:V_128 0 "register_operand" "")
3786   (match_operand 1 "" "")]
3787  "TARGET_SSE"
3788{
3789  ix86_expand_vector_init (false, operands[0], operands[1]);
3790  DONE;
3791})
3792
3793;; Avoid combining registers from different units in a single alternative,
3794;; see comment above inline_secondary_memory_needed function in i386.c
3795(define_insn "vec_set<mode>_0"
3796  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3797	  "=x,x,x ,x,x,x,x  ,x  ,m ,m   ,m")
3798	(vec_merge:VI4F_128
3799	  (vec_duplicate:VI4F_128
3800	    (match_operand:<ssescalarmode> 2 "general_operand"
3801	  " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
3802	  (match_operand:VI4F_128 1 "vector_move_operand"
3803	  " C,C,C ,C,0,x,0  ,x  ,0 ,0   ,0")
3804	  (const_int 1)))]
3805  "TARGET_SSE"
3806  "@
3807   %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3808   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3809   %vmovd\t{%2, %0|%0, %2}
3810   movss\t{%2, %0|%0, %2}
3811   movss\t{%2, %0|%0, %2}
3812   vmovss\t{%2, %1, %0|%0, %1, %2}
3813   pinsrd\t{$0, %2, %0|%0, %2, 0}
3814   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3815   #
3816   #
3817   #"
3818  [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3819   (set (attr "type")
3820     (cond [(eq_attr "alternative" "0,6,7")
3821	      (const_string "sselog")
3822	    (eq_attr "alternative" "9")
3823	      (const_string "imov")
3824	    (eq_attr "alternative" "10")
3825	      (const_string "fmov")
3826	   ]
3827	   (const_string "ssemov")))
3828   (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3829   (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3830   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3831   (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3832
3833;; A subset is vec_setv4sf.
3834(define_insn "*vec_setv4sf_sse4_1"
3835  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3836	(vec_merge:V4SF
3837	  (vec_duplicate:V4SF
3838	    (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3839	  (match_operand:V4SF 1 "register_operand" "0,x")
3840	  (match_operand:SI 3 "const_int_operand" "")))]
3841  "TARGET_SSE4_1
3842   && ((unsigned) exact_log2 (INTVAL (operands[3]))
3843       < GET_MODE_NUNITS (V4SFmode))"
3844{
3845  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3846  switch (which_alternative)
3847    {
3848    case 0:
3849      return "insertps\t{%3, %2, %0|%0, %2, %3}";
3850    case 1:
3851      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3852    default:
3853      gcc_unreachable ();
3854    }
3855}
3856  [(set_attr "isa" "noavx,avx")
3857   (set_attr "type" "sselog")
3858   (set_attr "prefix_data16" "1,*")
3859   (set_attr "prefix_extra" "1")
3860   (set_attr "length_immediate" "1")
3861   (set_attr "prefix" "orig,vex")
3862   (set_attr "mode" "V4SF")])
3863
3864(define_insn "sse4_1_insertps"
3865  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3866	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3867		      (match_operand:V4SF 1 "register_operand" "0,x")
3868		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3869		     UNSPEC_INSERTPS))]
3870  "TARGET_SSE4_1"
3871{
3872  if (MEM_P (operands[2]))
3873    {
3874      unsigned count_s = INTVAL (operands[3]) >> 6;
3875      if (count_s)
3876	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3877      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3878    }
3879  switch (which_alternative)
3880    {
3881    case 0:
3882      return "insertps\t{%3, %2, %0|%0, %2, %3}";
3883    case 1:
3884      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3885    default:
3886      gcc_unreachable ();
3887    }
3888}
3889  [(set_attr "isa" "noavx,avx")
3890   (set_attr "type" "sselog")
3891   (set_attr "prefix_data16" "1,*")
3892   (set_attr "prefix_extra" "1")
3893   (set_attr "length_immediate" "1")
3894   (set_attr "prefix" "orig,vex")
3895   (set_attr "mode" "V4SF")])
3896
3897(define_split
3898  [(set (match_operand:VI4F_128 0 "memory_operand" "")
3899	(vec_merge:VI4F_128
3900	  (vec_duplicate:VI4F_128
3901	    (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3902	  (match_dup 0)
3903	  (const_int 1)))]
3904  "TARGET_SSE && reload_completed"
3905  [(const_int 0)]
3906{
3907  emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3908		  operands[1]);
3909  DONE;
3910})
3911
3912(define_expand "vec_set<mode>"
3913  [(match_operand:V 0 "register_operand" "")
3914   (match_operand:<ssescalarmode> 1 "register_operand" "")
3915   (match_operand 2 "const_int_operand" "")]
3916  "TARGET_SSE"
3917{
3918  ix86_expand_vector_set (false, operands[0], operands[1],
3919			  INTVAL (operands[2]));
3920  DONE;
3921})
3922
3923(define_insn_and_split "*vec_extractv4sf_0"
3924  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3925	(vec_select:SF
3926	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3927	  (parallel [(const_int 0)])))]
3928  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3929  "#"
3930  "&& reload_completed"
3931  [(const_int 0)]
3932{
3933  rtx op1 = operands[1];
3934  if (REG_P (op1))
3935    op1 = gen_rtx_REG (SFmode, REGNO (op1));
3936  else
3937    op1 = gen_lowpart (SFmode, op1);
3938  emit_move_insn (operands[0], op1);
3939  DONE;
3940})
3941
3942(define_insn_and_split "*sse4_1_extractps"
3943  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3944	(vec_select:SF
3945	  (match_operand:V4SF 1 "register_operand" "x,0,x")
3946	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3947  "TARGET_SSE4_1"
3948  "@
3949   %vextractps\t{%2, %1, %0|%0, %1, %2}
3950   #
3951   #"
3952  "&& reload_completed && SSE_REG_P (operands[0])"
3953  [(const_int 0)]
3954{
3955  rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3956  switch (INTVAL (operands[2]))
3957    {
3958    case 1:
3959    case 3:
3960      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3961				      operands[2], operands[2],
3962				      GEN_INT (INTVAL (operands[2]) + 4),
3963				      GEN_INT (INTVAL (operands[2]) + 4)));
3964      break;
3965    case 2:
3966      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3967      break;
3968    default:
3969      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
3970      gcc_unreachable ();
3971    }
3972  DONE;
3973}
3974  [(set_attr "isa" "*,noavx,avx")
3975   (set_attr "type" "sselog,*,*")
3976   (set_attr "prefix_data16" "1,*,*")
3977   (set_attr "prefix_extra" "1,*,*")
3978   (set_attr "length_immediate" "1,*,*")
3979   (set_attr "prefix" "maybe_vex,*,*")
3980   (set_attr "mode" "V4SF,*,*")])
3981
3982(define_insn_and_split "*vec_extract_v4sf_mem"
3983  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3984       (vec_select:SF
3985	 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3986	 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3987  "TARGET_SSE"
3988  "#"
3989  "&& reload_completed"
3990  [(const_int 0)]
3991{
3992  int i = INTVAL (operands[2]);
3993
3994  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3995  DONE;
3996})
3997
3998(define_expand "avx_vextractf128<mode>"
3999  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4000   (match_operand:V_256 1 "register_operand" "")
4001   (match_operand:SI 2 "const_0_to_1_operand" "")]
4002  "TARGET_AVX"
4003{
4004  rtx (*insn)(rtx, rtx);
4005
4006  switch (INTVAL (operands[2]))
4007    {
4008    case 0:
4009      insn = gen_vec_extract_lo_<mode>;
4010      break;
4011    case 1:
4012      insn = gen_vec_extract_hi_<mode>;
4013      break;
4014    default:
4015      gcc_unreachable ();
4016    }
4017
4018  emit_insn (insn (operands[0], operands[1]));
4019  DONE;
4020})
4021
4022(define_insn_and_split "vec_extract_lo_<mode>"
4023  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4024	(vec_select:<ssehalfvecmode>
4025	  (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4026	  (parallel [(const_int 0) (const_int 1)])))]
4027  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4028  "#"
4029  "&& reload_completed"
4030  [(const_int 0)]
4031{
4032  rtx op1 = operands[1];
4033  if (REG_P (op1))
4034    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4035  else
4036    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4037  emit_move_insn (operands[0], op1);
4038  DONE;
4039})
4040
4041(define_insn "vec_extract_hi_<mode>"
4042  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4043	(vec_select:<ssehalfvecmode>
4044	  (match_operand:VI8F_256 1 "register_operand" "x,x")
4045	  (parallel [(const_int 2) (const_int 3)])))]
4046  "TARGET_AVX"
4047  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4048  [(set_attr "type" "sselog")
4049   (set_attr "prefix_extra" "1")
4050   (set_attr "length_immediate" "1")
4051   (set_attr "memory" "none,store")
4052   (set_attr "prefix" "vex")
4053   (set_attr "mode" "<sseinsnmode>")])
4054
4055(define_insn_and_split "vec_extract_lo_<mode>"
4056  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4057	(vec_select:<ssehalfvecmode>
4058	  (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4059	  (parallel [(const_int 0) (const_int 1)
4060		     (const_int 2) (const_int 3)])))]
4061  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4062  "#"
4063  "&& reload_completed"
4064  [(const_int 0)]
4065{
4066  rtx op1 = operands[1];
4067  if (REG_P (op1))
4068    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4069  else
4070    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4071  emit_move_insn (operands[0], op1);
4072  DONE;
4073})
4074
4075(define_insn "vec_extract_hi_<mode>"
4076  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4077	(vec_select:<ssehalfvecmode>
4078	  (match_operand:VI4F_256 1 "register_operand" "x,x")
4079	  (parallel [(const_int 4) (const_int 5)
4080		     (const_int 6) (const_int 7)])))]
4081  "TARGET_AVX"
4082  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4083  [(set_attr "type" "sselog")
4084   (set_attr "prefix_extra" "1")
4085   (set_attr "length_immediate" "1")
4086   (set_attr "memory" "none,store")
4087   (set_attr "prefix" "vex")
4088   (set_attr "mode" "<sseinsnmode>")])
4089
4090(define_insn_and_split "vec_extract_lo_v16hi"
4091  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4092	(vec_select:V8HI
4093	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4094	  (parallel [(const_int 0) (const_int 1)
4095		     (const_int 2) (const_int 3)
4096		     (const_int 4) (const_int 5)
4097		     (const_int 6) (const_int 7)])))]
4098  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4099  "#"
4100  "&& reload_completed"
4101  [(const_int 0)]
4102{
4103  rtx op1 = operands[1];
4104  if (REG_P (op1))
4105    op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4106  else
4107    op1 = gen_lowpart (V8HImode, op1);
4108  emit_move_insn (operands[0], op1);
4109  DONE;
4110})
4111
4112(define_insn "vec_extract_hi_v16hi"
4113  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4114	(vec_select:V8HI
4115	  (match_operand:V16HI 1 "register_operand" "x,x")
4116	  (parallel [(const_int 8) (const_int 9)
4117		     (const_int 10) (const_int 11)
4118		     (const_int 12) (const_int 13)
4119		     (const_int 14) (const_int 15)])))]
4120  "TARGET_AVX"
4121  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4122  [(set_attr "type" "sselog")
4123   (set_attr "prefix_extra" "1")
4124   (set_attr "length_immediate" "1")
4125   (set_attr "memory" "none,store")
4126   (set_attr "prefix" "vex")
4127   (set_attr "mode" "OI")])
4128
4129(define_insn_and_split "vec_extract_lo_v32qi"
4130  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4131	(vec_select:V16QI
4132	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4133	  (parallel [(const_int 0) (const_int 1)
4134		     (const_int 2) (const_int 3)
4135		     (const_int 4) (const_int 5)
4136		     (const_int 6) (const_int 7)
4137		     (const_int 8) (const_int 9)
4138		     (const_int 10) (const_int 11)
4139		     (const_int 12) (const_int 13)
4140		     (const_int 14) (const_int 15)])))]
4141  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4142  "#"
4143  "&& reload_completed"
4144  [(const_int 0)]
4145{
4146  rtx op1 = operands[1];
4147  if (REG_P (op1))
4148    op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4149  else
4150    op1 = gen_lowpart (V16QImode, op1);
4151  emit_move_insn (operands[0], op1);
4152  DONE;
4153})
4154
4155(define_insn "vec_extract_hi_v32qi"
4156  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4157	(vec_select:V16QI
4158	  (match_operand:V32QI 1 "register_operand" "x,x")
4159	  (parallel [(const_int 16) (const_int 17)
4160		     (const_int 18) (const_int 19)
4161		     (const_int 20) (const_int 21)
4162		     (const_int 22) (const_int 23)
4163		     (const_int 24) (const_int 25)
4164		     (const_int 26) (const_int 27)
4165		     (const_int 28) (const_int 29)
4166		     (const_int 30) (const_int 31)])))]
4167  "TARGET_AVX"
4168  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4169  [(set_attr "type" "sselog")
4170   (set_attr "prefix_extra" "1")
4171   (set_attr "length_immediate" "1")
4172   (set_attr "memory" "none,store")
4173   (set_attr "prefix" "vex")
4174   (set_attr "mode" "OI")])
4175
4176;; Modes handled by vec_extract patterns.
4177(define_mode_iterator VEC_EXTRACT_MODE
4178  [(V32QI "TARGET_AVX") V16QI
4179   (V16HI "TARGET_AVX") V8HI
4180   (V8SI "TARGET_AVX") V4SI
4181   (V4DI "TARGET_AVX") V2DI
4182   (V8SF "TARGET_AVX") V4SF
4183   (V4DF "TARGET_AVX") V2DF])
4184
4185(define_expand "vec_extract<mode>"
4186  [(match_operand:<ssescalarmode> 0 "register_operand" "")
4187   (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4188   (match_operand 2 "const_int_operand" "")]
4189  "TARGET_SSE"
4190{
4191  ix86_expand_vector_extract (false, operands[0], operands[1],
4192			      INTVAL (operands[2]));
4193  DONE;
4194})
4195
4196;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4197;;
4198;; Parallel double-precision floating point element swizzling
4199;;
4200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4201
4202;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4203(define_insn "avx_unpckhpd256"
4204  [(set (match_operand:V4DF 0 "register_operand" "=x")
4205	(vec_select:V4DF
4206	  (vec_concat:V8DF
4207	    (match_operand:V4DF 1 "register_operand" "x")
4208	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4209	  (parallel [(const_int 1) (const_int 5)
4210		     (const_int 3) (const_int 7)])))]
4211  "TARGET_AVX"
4212  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4213  [(set_attr "type" "sselog")
4214   (set_attr "prefix" "vex")
4215   (set_attr "mode" "V4DF")])
4216
4217(define_expand "vec_interleave_highv4df"
4218  [(set (match_dup 3)
4219	(vec_select:V4DF
4220	  (vec_concat:V8DF
4221	    (match_operand:V4DF 1 "register_operand" "x")
4222	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4223	  (parallel [(const_int 0) (const_int 4)
4224		     (const_int 2) (const_int 6)])))
4225   (set (match_dup 4)
4226	(vec_select:V4DF
4227	  (vec_concat:V8DF
4228	    (match_dup 1)
4229	    (match_dup 2))
4230	  (parallel [(const_int 1) (const_int 5)
4231		     (const_int 3) (const_int 7)])))
4232   (set (match_operand:V4DF 0 "register_operand" "")
4233	(vec_select:V4DF
4234	  (vec_concat:V8DF
4235	    (match_dup 3)
4236	    (match_dup 4))
4237	  (parallel [(const_int 2) (const_int 3)
4238		     (const_int 6) (const_int 7)])))]
4239 "TARGET_AVX"
4240{
4241  operands[3] = gen_reg_rtx (V4DFmode);
4242  operands[4] = gen_reg_rtx (V4DFmode);
4243})
4244
4245
4246(define_expand "vec_interleave_highv2df"
4247  [(set (match_operand:V2DF 0 "register_operand" "")
4248	(vec_select:V2DF
4249	  (vec_concat:V4DF
4250	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4251	    (match_operand:V2DF 2 "nonimmediate_operand" ""))
4252	  (parallel [(const_int 1)
4253		     (const_int 3)])))]
4254  "TARGET_SSE2"
4255{
4256  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4257    operands[2] = force_reg (V2DFmode, operands[2]);
4258})
4259
4260(define_insn "*vec_interleave_highv2df"
4261  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,m")
4262	(vec_select:V2DF
4263	  (vec_concat:V4DF
4264	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4265	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4266	  (parallel [(const_int 1)
4267		     (const_int 3)])))]
4268  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4269  "@
4270   unpckhpd\t{%2, %0|%0, %2}
4271   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4272   %vmovddup\t{%H1, %0|%0, %H1}
4273   movlpd\t{%H1, %0|%0, %H1}
4274   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4275   %vmovhpd\t{%1, %0|%0, %1}"
4276  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4277  (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4278   (set_attr "prefix_data16" "*,*,*,1,*,1")
4279   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4280   (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4281
4282;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4283(define_expand "avx_movddup256"
4284  [(set (match_operand:V4DF 0 "register_operand" "")
4285	(vec_select:V4DF
4286	  (vec_concat:V8DF
4287	    (match_operand:V4DF 1 "nonimmediate_operand" "")
4288	    (match_dup 1))
4289	  (parallel [(const_int 0) (const_int 4)
4290		     (const_int 2) (const_int 6)])))]
4291  "TARGET_AVX")
4292
4293(define_expand "avx_unpcklpd256"
4294  [(set (match_operand:V4DF 0 "register_operand" "")
4295	(vec_select:V4DF
4296	  (vec_concat:V8DF
4297	    (match_operand:V4DF 1 "register_operand" "")
4298	    (match_operand:V4DF 2 "nonimmediate_operand" ""))
4299	  (parallel [(const_int 0) (const_int 4)
4300		     (const_int 2) (const_int 6)])))]
4301  "TARGET_AVX")
4302
4303(define_insn "*avx_unpcklpd256"
4304  [(set (match_operand:V4DF 0 "register_operand"         "=x,x")
4305	(vec_select:V4DF
4306	  (vec_concat:V8DF
4307	    (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4308	    (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4309	  (parallel [(const_int 0) (const_int 4)
4310		     (const_int 2) (const_int 6)])))]
4311  "TARGET_AVX"
4312  "@
4313   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4314   vmovddup\t{%1, %0|%0, %1}"
4315  [(set_attr "type" "sselog")
4316   (set_attr "prefix" "vex")
4317   (set_attr "mode" "V4DF")])
4318
4319(define_expand "vec_interleave_lowv4df"
4320  [(set (match_dup 3)
4321	(vec_select:V4DF
4322	  (vec_concat:V8DF
4323	    (match_operand:V4DF 1 "register_operand" "x")
4324	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4325	  (parallel [(const_int 0) (const_int 4)
4326		     (const_int 2) (const_int 6)])))
4327   (set (match_dup 4)
4328	(vec_select:V4DF
4329	  (vec_concat:V8DF
4330	    (match_dup 1)
4331	    (match_dup 2))
4332	  (parallel [(const_int 1) (const_int 5)
4333		     (const_int 3) (const_int 7)])))
4334   (set (match_operand:V4DF 0 "register_operand" "")
4335	(vec_select:V4DF
4336	  (vec_concat:V8DF
4337	    (match_dup 3)
4338	    (match_dup 4))
4339	  (parallel [(const_int 0) (const_int 1)
4340		     (const_int 4) (const_int 5)])))]
4341 "TARGET_AVX"
4342{
4343  operands[3] = gen_reg_rtx (V4DFmode);
4344  operands[4] = gen_reg_rtx (V4DFmode);
4345})
4346
4347(define_expand "vec_interleave_lowv2df"
4348  [(set (match_operand:V2DF 0 "register_operand" "")
4349	(vec_select:V2DF
4350	  (vec_concat:V4DF
4351	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4352	    (match_operand:V2DF 2 "nonimmediate_operand" ""))
4353	  (parallel [(const_int 0)
4354		     (const_int 2)])))]
4355  "TARGET_SSE2"
4356{
4357  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4358    operands[1] = force_reg (V2DFmode, operands[1]);
4359})
4360
4361(define_insn "*vec_interleave_lowv2df"
4362  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,o")
4363	(vec_select:V2DF
4364	  (vec_concat:V4DF
4365	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4366	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4367	  (parallel [(const_int 0)
4368		     (const_int 2)])))]
4369  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4370  "@
4371   unpcklpd\t{%2, %0|%0, %2}
4372   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4373   %vmovddup\t{%1, %0|%0, %1}
4374   movhpd\t{%2, %0|%0, %2}
4375   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4376   %vmovlpd\t{%2, %H0|%H0, %2}"
4377  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4378   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4379   (set_attr "prefix_data16" "*,*,*,1,*,1")
4380   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4381   (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4382
4383(define_split
4384  [(set (match_operand:V2DF 0 "memory_operand" "")
4385	(vec_select:V2DF
4386	  (vec_concat:V4DF
4387	    (match_operand:V2DF 1 "register_operand" "")
4388	    (match_dup 1))
4389	  (parallel [(const_int 0)
4390		     (const_int 2)])))]
4391  "TARGET_SSE3 && reload_completed"
4392  [(const_int 0)]
4393{
4394  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4395  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4396  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4397  DONE;
4398})
4399
4400(define_split
4401  [(set (match_operand:V2DF 0 "register_operand" "")
4402	(vec_select:V2DF
4403	  (vec_concat:V4DF
4404	    (match_operand:V2DF 1 "memory_operand" "")
4405	    (match_dup 1))
4406	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4407		     (match_operand:SI 3 "const_int_operand" "")])))]
4408  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4409  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4410{
4411  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4412})
4413
4414(define_expand "avx_shufpd256"
4415  [(match_operand:V4DF 0 "register_operand" "")
4416   (match_operand:V4DF 1 "register_operand" "")
4417   (match_operand:V4DF 2 "nonimmediate_operand" "")
4418   (match_operand:SI 3 "const_int_operand" "")]
4419  "TARGET_AVX"
4420{
4421  int mask = INTVAL (operands[3]);
4422  emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4423				   GEN_INT (mask & 1),
4424				   GEN_INT (mask & 2 ? 5 : 4),
4425				   GEN_INT (mask & 4 ? 3 : 2),
4426				   GEN_INT (mask & 8 ? 7 : 6)));
4427  DONE;
4428})
4429
4430(define_insn "avx_shufpd256_1"
4431  [(set (match_operand:V4DF 0 "register_operand" "=x")
4432	(vec_select:V4DF
4433	  (vec_concat:V8DF
4434	    (match_operand:V4DF 1 "register_operand" "x")
4435	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4436	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
4437		     (match_operand 4 "const_4_to_5_operand" "")
4438		     (match_operand 5 "const_2_to_3_operand" "")
4439		     (match_operand 6 "const_6_to_7_operand" "")])))]
4440  "TARGET_AVX"
4441{
4442  int mask;
4443  mask = INTVAL (operands[3]);
4444  mask |= (INTVAL (operands[4]) - 4) << 1;
4445  mask |= (INTVAL (operands[5]) - 2) << 2;
4446  mask |= (INTVAL (operands[6]) - 6) << 3;
4447  operands[3] = GEN_INT (mask);
4448
4449  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4450}
4451  [(set_attr "type" "sselog")
4452   (set_attr "length_immediate" "1")
4453   (set_attr "prefix" "vex")
4454   (set_attr "mode" "V4DF")])
4455
4456(define_expand "sse2_shufpd"
4457  [(match_operand:V2DF 0 "register_operand" "")
4458   (match_operand:V2DF 1 "register_operand" "")
4459   (match_operand:V2DF 2 "nonimmediate_operand" "")
4460   (match_operand:SI 3 "const_int_operand" "")]
4461  "TARGET_SSE2"
4462{
4463  int mask = INTVAL (operands[3]);
4464  emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4465				GEN_INT (mask & 1),
4466				GEN_INT (mask & 2 ? 3 : 2)));
4467  DONE;
4468})
4469
4470;; punpcklqdq and punpckhqdq are shorter than shufpd.
4471(define_insn "avx2_interleave_highv4di"
4472  [(set (match_operand:V4DI 0 "register_operand" "=x")
4473	(vec_select:V4DI
4474	  (vec_concat:V8DI
4475	    (match_operand:V4DI 1 "register_operand" "x")
4476	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4477	  (parallel [(const_int 1)
4478		     (const_int 5)
4479		     (const_int 3)
4480		     (const_int 7)])))]
4481  "TARGET_AVX2"
4482  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4483  [(set_attr "type" "sselog")
4484   (set_attr "prefix" "vex")
4485   (set_attr "mode" "OI")])
4486
4487(define_insn "vec_interleave_highv2di"
4488  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4489	(vec_select:V2DI
4490	  (vec_concat:V4DI
4491	    (match_operand:V2DI 1 "register_operand" "0,x")
4492	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4493	  (parallel [(const_int 1)
4494		     (const_int 3)])))]
4495  "TARGET_SSE2"
4496  "@
4497   punpckhqdq\t{%2, %0|%0, %2}
4498   vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4499  [(set_attr "isa" "noavx,avx")
4500   (set_attr "type" "sselog")
4501   (set_attr "prefix_data16" "1,*")
4502   (set_attr "prefix" "orig,vex")
4503   (set_attr "mode" "TI")])
4504
4505(define_insn "avx2_interleave_lowv4di"
4506  [(set (match_operand:V4DI 0 "register_operand" "=x")
4507	(vec_select:V4DI
4508	  (vec_concat:V8DI
4509	    (match_operand:V4DI 1 "register_operand" "x")
4510	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4511	  (parallel [(const_int 0)
4512		     (const_int 4)
4513		     (const_int 2)
4514		     (const_int 6)])))]
4515  "TARGET_AVX2"
4516  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4517  [(set_attr "type" "sselog")
4518   (set_attr "prefix" "vex")
4519   (set_attr "mode" "OI")])
4520
4521(define_insn "vec_interleave_lowv2di"
4522  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4523	(vec_select:V2DI
4524	  (vec_concat:V4DI
4525	    (match_operand:V2DI 1 "register_operand" "0,x")
4526	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4527	  (parallel [(const_int 0)
4528		     (const_int 2)])))]
4529  "TARGET_SSE2"
4530  "@
4531   punpcklqdq\t{%2, %0|%0, %2}
4532   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4533  [(set_attr "isa" "noavx,avx")
4534   (set_attr "type" "sselog")
4535   (set_attr "prefix_data16" "1,*")
4536   (set_attr "prefix" "orig,vex")
4537   (set_attr "mode" "TI")])
4538
4539(define_insn "sse2_shufpd_<mode>"
4540  [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4541	(vec_select:VI8F_128
4542	  (vec_concat:<ssedoublevecmode>
4543	    (match_operand:VI8F_128 1 "register_operand" "0,x")
4544	    (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4545	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
4546		     (match_operand 4 "const_2_to_3_operand" "")])))]
4547  "TARGET_SSE2"
4548{
4549  int mask;
4550  mask = INTVAL (operands[3]);
4551  mask |= (INTVAL (operands[4]) - 2) << 1;
4552  operands[3] = GEN_INT (mask);
4553
4554  switch (which_alternative)
4555    {
4556    case 0:
4557      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4558    case 1:
4559      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4560    default:
4561      gcc_unreachable ();
4562    }
4563}
4564  [(set_attr "isa" "noavx,avx")
4565   (set_attr "type" "sselog")
4566   (set_attr "length_immediate" "1")
4567   (set_attr "prefix" "orig,vex")
4568   (set_attr "mode" "V2DF")])
4569
4570;; Avoid combining registers from different units in a single alternative,
4571;; see comment above inline_secondary_memory_needed function in i386.c
4572(define_insn "sse2_storehpd"
4573  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,x,*f,r")
4574	(vec_select:DF
4575	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4576	  (parallel [(const_int 1)])))]
4577  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4578  "@
4579   %vmovhpd\t{%1, %0|%0, %1}
4580   unpckhpd\t%0, %0
4581   vunpckhpd\t{%d1, %0|%0, %d1}
4582   #
4583   #
4584   #"
4585  [(set_attr "isa" "*,noavx,avx,*,*,*")
4586   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4587   (set (attr "prefix_data16")
4588     (if_then_else
4589       (and (eq_attr "alternative" "0")
4590	    (not (match_test "TARGET_AVX")))
4591       (const_string "1")
4592       (const_string "*")))
4593   (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4594   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4595
4596(define_split
4597  [(set (match_operand:DF 0 "register_operand" "")
4598	(vec_select:DF
4599	  (match_operand:V2DF 1 "memory_operand" "")
4600	  (parallel [(const_int 1)])))]
4601  "TARGET_SSE2 && reload_completed"
4602  [(set (match_dup 0) (match_dup 1))]
4603  "operands[1] = adjust_address (operands[1], DFmode, 8);")
4604
4605(define_insn "*vec_extractv2df_1_sse"
4606  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4607	(vec_select:DF
4608	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4609	  (parallel [(const_int 1)])))]
4610  "!TARGET_SSE2 && TARGET_SSE
4611   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4612  "@
4613   movhps\t{%1, %0|%0, %1}
4614   movhlps\t{%1, %0|%0, %1}
4615   movlps\t{%H1, %0|%0, %H1}"
4616  [(set_attr "type" "ssemov")
4617   (set_attr "mode" "V2SF,V4SF,V2SF")])
4618
4619;; Avoid combining registers from different units in a single alternative,
4620;; see comment above inline_secondary_memory_needed function in i386.c
4621(define_insn "sse2_storelpd"
4622  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
4623	(vec_select:DF
4624	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4625	  (parallel [(const_int 0)])))]
4626  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4627  "@
4628   %vmovlpd\t{%1, %0|%0, %1}
4629   #
4630   #
4631   #
4632   #"
4633  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4634   (set_attr "prefix_data16" "1,*,*,*,*")
4635   (set_attr "prefix" "maybe_vex")
4636   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4637
4638(define_split
4639  [(set (match_operand:DF 0 "register_operand" "")
4640	(vec_select:DF
4641	  (match_operand:V2DF 1 "nonimmediate_operand" "")
4642	  (parallel [(const_int 0)])))]
4643  "TARGET_SSE2 && reload_completed"
4644  [(const_int 0)]
4645{
4646  rtx op1 = operands[1];
4647  if (REG_P (op1))
4648    op1 = gen_rtx_REG (DFmode, REGNO (op1));
4649  else
4650    op1 = gen_lowpart (DFmode, op1);
4651  emit_move_insn (operands[0], op1);
4652  DONE;
4653})
4654
4655(define_insn "*vec_extractv2df_0_sse"
4656  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4657	(vec_select:DF
4658	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4659	  (parallel [(const_int 0)])))]
4660  "!TARGET_SSE2 && TARGET_SSE
4661   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4662  "@
4663   movlps\t{%1, %0|%0, %1}
4664   movaps\t{%1, %0|%0, %1}
4665   movlps\t{%1, %0|%0, %1}"
4666  [(set_attr "type" "ssemov")
4667   (set_attr "mode" "V2SF,V4SF,V2SF")])
4668
4669(define_expand "sse2_loadhpd_exp"
4670  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4671	(vec_concat:V2DF
4672	  (vec_select:DF
4673	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4674	    (parallel [(const_int 0)]))
4675	  (match_operand:DF 2 "nonimmediate_operand" "")))]
4676  "TARGET_SSE2"
4677{
4678  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4679
4680  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4681
4682  /* Fix up the destination if needed.  */
4683  if (dst != operands[0])
4684    emit_move_insn (operands[0], dst);
4685
4686  DONE;
4687})
4688
4689;; Avoid combining registers from different units in a single alternative,
4690;; see comment above inline_secondary_memory_needed function in i386.c
4691(define_insn "sse2_loadhpd"
4692  [(set (match_operand:V2DF 0 "nonimmediate_operand"
4693	  "=x,x,x,x,o,o ,o")
4694	(vec_concat:V2DF
4695	  (vec_select:DF
4696	    (match_operand:V2DF 1 "nonimmediate_operand"
4697	  " 0,x,0,x,0,0 ,0")
4698	    (parallel [(const_int 0)]))
4699	  (match_operand:DF 2 "nonimmediate_operand"
4700	  " m,m,x,x,x,*f,r")))]
4701  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4702  "@
4703   movhpd\t{%2, %0|%0, %2}
4704   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4705   unpcklpd\t{%2, %0|%0, %2}
4706   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4707   #
4708   #
4709   #"
4710  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4711   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4712   (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4713   (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4714   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4715
4716(define_split
4717  [(set (match_operand:V2DF 0 "memory_operand" "")
4718	(vec_concat:V2DF
4719	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4720	  (match_operand:DF 1 "register_operand" "")))]
4721  "TARGET_SSE2 && reload_completed"
4722  [(set (match_dup 0) (match_dup 1))]
4723  "operands[0] = adjust_address (operands[0], DFmode, 8);")
4724
4725(define_expand "sse2_loadlpd_exp"
4726  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4727	(vec_concat:V2DF
4728	  (match_operand:DF 2 "nonimmediate_operand" "")
4729	  (vec_select:DF
4730	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4731	    (parallel [(const_int 1)]))))]
4732  "TARGET_SSE2"
4733{
4734  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4735
4736  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4737
4738  /* Fix up the destination if needed.  */
4739  if (dst != operands[0])
4740    emit_move_insn (operands[0], dst);
4741
4742  DONE;
4743})
4744
4745;; Avoid combining registers from different units in a single alternative,
4746;; see comment above inline_secondary_memory_needed function in i386.c
4747(define_insn "sse2_loadlpd"
4748  [(set (match_operand:V2DF 0 "nonimmediate_operand"
4749	  "=x,x,x,x,x,x,x,x,m,m ,m")
4750	(vec_concat:V2DF
4751	  (match_operand:DF 2 "nonimmediate_operand"
4752	  " m,m,m,x,x,0,0,x,x,*f,r")
4753	  (vec_select:DF
4754	    (match_operand:V2DF 1 "vector_move_operand"
4755	  " C,0,x,0,x,x,o,o,0,0 ,0")
4756	    (parallel [(const_int 1)]))))]
4757  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4758  "@
4759   %vmovsd\t{%2, %0|%0, %2}
4760   movlpd\t{%2, %0|%0, %2}
4761   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4762   movsd\t{%2, %0|%0, %2}
4763   vmovsd\t{%2, %1, %0|%0, %1, %2}
4764   shufpd\t{$2, %1, %0|%0, %1, 2}
4765   movhpd\t{%H1, %0|%0, %H1}
4766   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4767   #
4768   #
4769   #"
4770  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4771   (set (attr "type")
4772     (cond [(eq_attr "alternative" "5")
4773	      (const_string "sselog")
4774	    (eq_attr "alternative" "9")
4775	      (const_string "fmov")
4776	    (eq_attr "alternative" "10")
4777	      (const_string "imov")
4778	   ]
4779	   (const_string "ssemov")))
4780   (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4781   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4782   (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4783   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4784
4785(define_split
4786  [(set (match_operand:V2DF 0 "memory_operand" "")
4787	(vec_concat:V2DF
4788	  (match_operand:DF 1 "register_operand" "")
4789	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4790  "TARGET_SSE2 && reload_completed"
4791  [(set (match_dup 0) (match_dup 1))]
4792  "operands[0] = adjust_address (operands[0], DFmode, 0);")
4793
4794(define_insn "sse2_movsd"
4795  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
4796	(vec_merge:V2DF
4797	  (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4798	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4799	  (const_int 1)))]
4800  "TARGET_SSE2"
4801  "@
4802   movsd\t{%2, %0|%0, %2}
4803   vmovsd\t{%2, %1, %0|%0, %1, %2}
4804   movlpd\t{%2, %0|%0, %2}
4805   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4806   %vmovlpd\t{%2, %0|%0, %2}
4807   shufpd\t{$2, %1, %0|%0, %1, 2}
4808   movhps\t{%H1, %0|%0, %H1}
4809   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4810   %vmovhps\t{%1, %H0|%H0, %1}"
4811  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4812   (set (attr "type")
4813     (if_then_else
4814       (eq_attr "alternative" "5")
4815       (const_string "sselog")
4816       (const_string "ssemov")))
4817   (set (attr "prefix_data16")
4818     (if_then_else
4819       (and (eq_attr "alternative" "2,4")
4820	    (not (match_test "TARGET_AVX")))
4821       (const_string "1")
4822       (const_string "*")))
4823   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4824   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4825   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4826
4827(define_insn "vec_dupv2df"
4828  [(set (match_operand:V2DF 0 "register_operand"     "=x,x")
4829	(vec_duplicate:V2DF
4830	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4831  "TARGET_SSE2"
4832  "@
4833   unpcklpd\t%0, %0
4834   %vmovddup\t{%1, %0|%0, %1}"
4835  [(set_attr "isa" "noavx,sse3")
4836   (set_attr "type" "sselog1")
4837   (set_attr "prefix" "orig,maybe_vex")
4838   (set_attr "mode" "V2DF")])
4839
4840(define_insn "*vec_concatv2df"
4841  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,x,x,x,x,x,x")
4842	(vec_concat:V2DF
4843	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4844	  (match_operand:DF 2 "vector_move_operand"  " x,x,1,m,m,C,x,m")))]
4845  "TARGET_SSE"
4846  "@
4847   unpcklpd\t{%2, %0|%0, %2}
4848   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4849   %vmovddup\t{%1, %0|%0, %1}
4850   movhpd\t{%2, %0|%0, %2}
4851   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4852   %vmovsd\t{%1, %0|%0, %1}
4853   movlhps\t{%2, %0|%0, %2}
4854   movhps\t{%2, %0|%0, %2}"
4855  [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4856   (set (attr "type")
4857     (if_then_else
4858       (eq_attr "alternative" "0,1,2")
4859       (const_string "sselog")
4860       (const_string "ssemov")))
4861   (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4862   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4863   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4864
4865;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4866;;
4867;; Parallel integral arithmetic
4868;;
4869;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4870
4871(define_expand "neg<mode>2"
4872  [(set (match_operand:VI_AVX2 0 "register_operand" "")
4873	(minus:VI_AVX2
4874	  (match_dup 2)
4875	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4876  "TARGET_SSE2"
4877  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4878
4879(define_expand "<plusminus_insn><mode>3"
4880  [(set (match_operand:VI_AVX2 0 "register_operand" "")
4881	(plusminus:VI_AVX2
4882	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4883	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4884  "TARGET_SSE2"
4885  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4886
4887(define_insn "*<plusminus_insn><mode>3"
4888  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4889	(plusminus:VI_AVX2
4890	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4891	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4892  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4893  "@
4894   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4895   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4896  [(set_attr "isa" "noavx,avx")
4897   (set_attr "type" "sseiadd")
4898   (set_attr "prefix_data16" "1,*")
4899   (set_attr "prefix" "orig,vex")
4900   (set_attr "mode" "<sseinsnmode>")])
4901
4902(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4903  [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4904	(sat_plusminus:VI12_AVX2
4905	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4906	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4907  "TARGET_SSE2"
4908  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4909
4910(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4911  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4912	(sat_plusminus:VI12_AVX2
4913	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4914	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4915  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4916  "@
4917   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4918   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4919  [(set_attr "isa" "noavx,avx")
4920   (set_attr "type" "sseiadd")
4921   (set_attr "prefix_data16" "1,*")
4922   (set_attr "prefix" "orig,vex")
4923   (set_attr "mode" "TI")])
4924
4925(define_insn_and_split "mul<mode>3"
4926  [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4927	(mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4928		       (match_operand:VI1_AVX2 2 "register_operand" "")))]
4929  "TARGET_SSE2
4930   && can_create_pseudo_p ()"
4931  "#"
4932  "&& 1"
4933  [(const_int 0)]
4934{
4935  rtx t[6];
4936  int i;
4937  enum machine_mode mulmode = <sseunpackmode>mode;
4938
4939  for (i = 0; i < 6; ++i)
4940    t[i] = gen_reg_rtx (<MODE>mode);
4941
4942  /* Unpack data such that we've got a source byte in each low byte of
4943     each word.  We don't care what goes into the high byte of each word.
4944     Rather than trying to get zero in there, most convenient is to let
4945     it be a copy of the low byte.  */
4946  emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4947						   operands[1]));
4948  emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4949						   operands[2]));
4950  emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4951						  operands[1]));
4952  emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4953						  operands[2]));
4954
4955  /* Multiply words.  The end-of-line annotations here give a picture of what
4956     the output of that instruction looks like.  Dot means don't care; the
4957     letters are the bytes of the result with A being the most significant.  */
4958  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4959			  gen_rtx_MULT (mulmode,	/* .A.B.C.D.E.F.G.H */
4960					gen_lowpart (mulmode, t[0]),
4961					gen_lowpart (mulmode, t[1]))));
4962  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4963			  gen_rtx_MULT (mulmode,	/* .I.J.K.L.M.N.O.P */
4964					gen_lowpart (mulmode, t[2]),
4965					gen_lowpart (mulmode, t[3]))));
4966
4967  /* Extract the even bytes and merge them back together.  */
4968  if (<MODE>mode == V16QImode)
4969    ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4970  else
4971    {
4972      /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
4973	 this can't be normal even extraction, but one where additionally
4974	 the second and third quarter are swapped.  That is even one insn
4975	 shorter than even extraction.  */
4976      rtvec v = rtvec_alloc (32);
4977      for (i = 0; i < 32; ++i)
4978	RTVEC_ELT (v, i)
4979	  = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
4980      t[0] = operands[0];
4981      t[1] = t[5];
4982      t[2] = t[4];
4983      t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
4984      ix86_expand_vec_perm_const (t);
4985    }
4986
4987  set_unique_reg_note (get_last_insn (), REG_EQUAL,
4988		       gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4989  DONE;
4990})
4991
4992(define_expand "mul<mode>3"
4993  [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4994	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4995		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4996  "TARGET_SSE2"
4997  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4998
4999(define_insn "*mul<mode>3"
5000  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5001	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5002		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5003  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5004  "@
5005   pmullw\t{%2, %0|%0, %2}
5006   vpmullw\t{%2, %1, %0|%0, %1, %2}"
5007  [(set_attr "isa" "noavx,avx")
5008   (set_attr "type" "sseimul")
5009   (set_attr "prefix_data16" "1,*")
5010   (set_attr "prefix" "orig,vex")
5011   (set_attr "mode" "<sseinsnmode>")])
5012
5013(define_expand "<s>mul<mode>3_highpart"
5014  [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5015	(truncate:VI2_AVX2
5016	  (lshiftrt:<ssedoublemode>
5017	    (mult:<ssedoublemode>
5018	      (any_extend:<ssedoublemode>
5019		(match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5020	      (any_extend:<ssedoublemode>
5021		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5022	    (const_int 16))))]
5023  "TARGET_SSE2"
5024  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5025
5026(define_insn "*<s>mul<mode>3_highpart"
5027  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5028	(truncate:VI2_AVX2
5029	  (lshiftrt:<ssedoublemode>
5030	    (mult:<ssedoublemode>
5031	      (any_extend:<ssedoublemode>
5032		(match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5033	      (any_extend:<ssedoublemode>
5034		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5035	    (const_int 16))))]
5036  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5037  "@
5038   pmulh<u>w\t{%2, %0|%0, %2}
5039   vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5040  [(set_attr "isa" "noavx,avx")
5041   (set_attr "type" "sseimul")
5042   (set_attr "prefix_data16" "1,*")
5043   (set_attr "prefix" "orig,vex")
5044   (set_attr "mode" "<sseinsnmode>")])
5045
5046(define_expand "avx2_umulv4siv4di3"
5047  [(set (match_operand:V4DI 0 "register_operand" "")
5048	(mult:V4DI
5049	  (zero_extend:V4DI
5050	    (vec_select:V4SI
5051	      (match_operand:V8SI 1 "nonimmediate_operand" "")
5052	      (parallel [(const_int 0) (const_int 2)
5053			 (const_int 4) (const_int 6)])))
5054	  (zero_extend:V4DI
5055	    (vec_select:V4SI
5056	      (match_operand:V8SI 2 "nonimmediate_operand" "")
5057	      (parallel [(const_int 0) (const_int 2)
5058			 (const_int 4) (const_int 6)])))))]
5059  "TARGET_AVX2"
5060  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5061
5062(define_insn "*avx_umulv4siv4di3"
5063  [(set (match_operand:V4DI 0 "register_operand" "=x")
5064	(mult:V4DI
5065	  (zero_extend:V4DI
5066	    (vec_select:V4SI
5067	      (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5068	      (parallel [(const_int 0) (const_int 2)
5069			 (const_int 4) (const_int 6)])))
5070	  (zero_extend:V4DI
5071	    (vec_select:V4SI
5072	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5073	      (parallel [(const_int 0) (const_int 2)
5074			 (const_int 4) (const_int 6)])))))]
5075  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5076  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5077  [(set_attr "type" "sseimul")
5078   (set_attr "prefix" "vex")
5079   (set_attr "mode" "OI")])
5080
5081(define_expand "sse2_umulv2siv2di3"
5082  [(set (match_operand:V2DI 0 "register_operand" "")
5083	(mult:V2DI
5084	  (zero_extend:V2DI
5085	    (vec_select:V2SI
5086	      (match_operand:V4SI 1 "nonimmediate_operand" "")
5087	      (parallel [(const_int 0) (const_int 2)])))
5088	  (zero_extend:V2DI
5089	    (vec_select:V2SI
5090	      (match_operand:V4SI 2 "nonimmediate_operand" "")
5091	      (parallel [(const_int 0) (const_int 2)])))))]
5092  "TARGET_SSE2"
5093  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5094
5095(define_insn "*sse2_umulv2siv2di3"
5096  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5097	(mult:V2DI
5098	  (zero_extend:V2DI
5099	    (vec_select:V2SI
5100	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5101	      (parallel [(const_int 0) (const_int 2)])))
5102	  (zero_extend:V2DI
5103	    (vec_select:V2SI
5104	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5105	      (parallel [(const_int 0) (const_int 2)])))))]
5106  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5107  "@
5108   pmuludq\t{%2, %0|%0, %2}
5109   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5110  [(set_attr "isa" "noavx,avx")
5111   (set_attr "type" "sseimul")
5112   (set_attr "prefix_data16" "1,*")
5113   (set_attr "prefix" "orig,vex")
5114   (set_attr "mode" "TI")])
5115
5116(define_expand "avx2_mulv4siv4di3"
5117  [(set (match_operand:V4DI 0 "register_operand" "")
5118	(mult:V4DI
5119	  (sign_extend:V4DI
5120	    (vec_select:V4SI
5121	      (match_operand:V8SI 1 "nonimmediate_operand" "")
5122	      (parallel [(const_int 0) (const_int 2)
5123			 (const_int 4) (const_int 6)])))
5124	  (sign_extend:V4DI
5125	    (vec_select:V4SI
5126	      (match_operand:V8SI 2 "nonimmediate_operand" "")
5127	      (parallel [(const_int 0) (const_int 2)
5128			 (const_int 4) (const_int 6)])))))]
5129  "TARGET_AVX2"
5130  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5131
5132(define_insn "*avx2_mulv4siv4di3"
5133  [(set (match_operand:V4DI 0 "register_operand" "=x")
5134	(mult:V4DI
5135	  (sign_extend:V4DI
5136	    (vec_select:V4SI
5137	      (match_operand:V8SI 1 "nonimmediate_operand" "x")
5138	      (parallel [(const_int 0) (const_int 2)
5139			 (const_int 4) (const_int 6)])))
5140	  (sign_extend:V4DI
5141	    (vec_select:V4SI
5142	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5143	      (parallel [(const_int 0) (const_int 2)
5144			 (const_int 4) (const_int 6)])))))]
5145  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5146  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5147  [(set_attr "isa" "avx")
5148   (set_attr "type" "sseimul")
5149   (set_attr "prefix_extra" "1")
5150   (set_attr "prefix" "vex")
5151   (set_attr "mode" "OI")])
5152
5153(define_expand "sse4_1_mulv2siv2di3"
5154  [(set (match_operand:V2DI 0 "register_operand" "")
5155	(mult:V2DI
5156	  (sign_extend:V2DI
5157	    (vec_select:V2SI
5158	      (match_operand:V4SI 1 "nonimmediate_operand" "")
5159	      (parallel [(const_int 0) (const_int 2)])))
5160	  (sign_extend:V2DI
5161	    (vec_select:V2SI
5162	      (match_operand:V4SI 2 "nonimmediate_operand" "")
5163	      (parallel [(const_int 0) (const_int 2)])))))]
5164  "TARGET_SSE4_1"
5165  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5166
5167(define_insn "*sse4_1_mulv2siv2di3"
5168  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5169	(mult:V2DI
5170	  (sign_extend:V2DI
5171	    (vec_select:V2SI
5172	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5173	      (parallel [(const_int 0) (const_int 2)])))
5174	  (sign_extend:V2DI
5175	    (vec_select:V2SI
5176	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5177	      (parallel [(const_int 0) (const_int 2)])))))]
5178  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5179  "@
5180   pmuldq\t{%2, %0|%0, %2}
5181   vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5182  [(set_attr "isa" "noavx,avx")
5183   (set_attr "type" "sseimul")
5184   (set_attr "prefix_data16" "1,*")
5185   (set_attr "prefix_extra" "1")
5186   (set_attr "prefix" "orig,vex")
5187   (set_attr "mode" "TI")])
5188
5189(define_expand "avx2_pmaddwd"
5190  [(set (match_operand:V8SI 0 "register_operand" "")
5191	(plus:V8SI
5192	  (mult:V8SI
5193	    (sign_extend:V8SI
5194	      (vec_select:V8HI
5195		(match_operand:V16HI 1 "nonimmediate_operand" "")
5196		(parallel [(const_int 0)
5197			   (const_int 2)
5198			   (const_int 4)
5199			   (const_int 6)
5200			   (const_int 8)
5201			   (const_int 10)
5202			   (const_int 12)
5203			   (const_int 14)])))
5204	    (sign_extend:V8SI
5205	      (vec_select:V8HI
5206		(match_operand:V16HI 2 "nonimmediate_operand" "")
5207		(parallel [(const_int 0)
5208			   (const_int 2)
5209			   (const_int 4)
5210			   (const_int 6)
5211			   (const_int 8)
5212			   (const_int 10)
5213			   (const_int 12)
5214			   (const_int 14)]))))
5215	  (mult:V8SI
5216	    (sign_extend:V8SI
5217	      (vec_select:V8HI (match_dup 1)
5218		(parallel [(const_int 1)
5219			   (const_int 3)
5220			   (const_int 5)
5221			   (const_int 7)
5222			   (const_int 9)
5223			   (const_int 11)
5224			   (const_int 13)
5225			   (const_int 15)])))
5226	    (sign_extend:V8SI
5227	      (vec_select:V8HI (match_dup 2)
5228		(parallel [(const_int 1)
5229			   (const_int 3)
5230			   (const_int 5)
5231			   (const_int 7)
5232			   (const_int 9)
5233			   (const_int 11)
5234			   (const_int 13)
5235			   (const_int 15)]))))))]
5236  "TARGET_AVX2"
5237  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5238
5239(define_expand "sse2_pmaddwd"
5240  [(set (match_operand:V4SI 0 "register_operand" "")
5241	(plus:V4SI
5242	  (mult:V4SI
5243	    (sign_extend:V4SI
5244	      (vec_select:V4HI
5245		(match_operand:V8HI 1 "nonimmediate_operand" "")
5246		(parallel [(const_int 0)
5247			   (const_int 2)
5248			   (const_int 4)
5249			   (const_int 6)])))
5250	    (sign_extend:V4SI
5251	      (vec_select:V4HI
5252		(match_operand:V8HI 2 "nonimmediate_operand" "")
5253		(parallel [(const_int 0)
5254			   (const_int 2)
5255			   (const_int 4)
5256			   (const_int 6)]))))
5257	  (mult:V4SI
5258	    (sign_extend:V4SI
5259	      (vec_select:V4HI (match_dup 1)
5260		(parallel [(const_int 1)
5261			   (const_int 3)
5262			   (const_int 5)
5263			   (const_int 7)])))
5264	    (sign_extend:V4SI
5265	      (vec_select:V4HI (match_dup 2)
5266		(parallel [(const_int 1)
5267			   (const_int 3)
5268			   (const_int 5)
5269			   (const_int 7)]))))))]
5270  "TARGET_SSE2"
5271  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5272
5273(define_insn "*avx2_pmaddwd"
5274  [(set (match_operand:V8SI 0 "register_operand" "=x")
5275	(plus:V8SI
5276	  (mult:V8SI
5277	    (sign_extend:V8SI
5278	      (vec_select:V8HI
5279		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
5280		(parallel [(const_int 0)
5281			   (const_int 2)
5282			   (const_int 4)
5283			   (const_int 6)
5284			   (const_int 8)
5285			   (const_int 10)
5286			   (const_int 12)
5287			   (const_int 14)])))
5288	    (sign_extend:V8SI
5289	      (vec_select:V8HI
5290		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
5291		(parallel [(const_int 0)
5292			   (const_int 2)
5293			   (const_int 4)
5294			   (const_int 6)
5295			   (const_int 8)
5296			   (const_int 10)
5297			   (const_int 12)
5298			   (const_int 14)]))))
5299	  (mult:V8SI
5300	    (sign_extend:V8SI
5301	      (vec_select:V8HI (match_dup 1)
5302		(parallel [(const_int 1)
5303			   (const_int 3)
5304			   (const_int 5)
5305			   (const_int 7)
5306			   (const_int 9)
5307			   (const_int 11)
5308			   (const_int 13)
5309			   (const_int 15)])))
5310	    (sign_extend:V8SI
5311	      (vec_select:V8HI (match_dup 2)
5312		(parallel [(const_int 1)
5313			   (const_int 3)
5314			   (const_int 5)
5315			   (const_int 7)
5316			   (const_int 9)
5317			   (const_int 11)
5318			   (const_int 13)
5319			   (const_int 15)]))))))]
5320  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5321  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5322  [(set_attr "type" "sseiadd")
5323   (set_attr "prefix" "vex")
5324   (set_attr "mode" "OI")])
5325
5326(define_insn "*sse2_pmaddwd"
5327  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5328	(plus:V4SI
5329	  (mult:V4SI
5330	    (sign_extend:V4SI
5331	      (vec_select:V4HI
5332		(match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5333		(parallel [(const_int 0)
5334			   (const_int 2)
5335			   (const_int 4)
5336			   (const_int 6)])))
5337	    (sign_extend:V4SI
5338	      (vec_select:V4HI
5339		(match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5340		(parallel [(const_int 0)
5341			   (const_int 2)
5342			   (const_int 4)
5343			   (const_int 6)]))))
5344	  (mult:V4SI
5345	    (sign_extend:V4SI
5346	      (vec_select:V4HI (match_dup 1)
5347		(parallel [(const_int 1)
5348			   (const_int 3)
5349			   (const_int 5)
5350			   (const_int 7)])))
5351	    (sign_extend:V4SI
5352	      (vec_select:V4HI (match_dup 2)
5353		(parallel [(const_int 1)
5354			   (const_int 3)
5355			   (const_int 5)
5356			   (const_int 7)]))))))]
5357  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5358  "@
5359   pmaddwd\t{%2, %0|%0, %2}
5360   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5361  [(set_attr "isa" "noavx,avx")
5362   (set_attr "type" "sseiadd")
5363   (set_attr "atom_unit" "simul")
5364   (set_attr "prefix_data16" "1,*")
5365   (set_attr "prefix" "orig,vex")
5366   (set_attr "mode" "TI")])
5367
5368(define_expand "mul<mode>3"
5369  [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5370	(mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5371		       (match_operand:VI4_AVX2 2 "register_operand" "")))]
5372  "TARGET_SSE2"
5373{
5374  if (TARGET_SSE4_1 || TARGET_AVX)
5375    ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5376})
5377
5378(define_insn "*<sse4_1_avx2>_mul<mode>3"
5379  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5380	(mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5381		       (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5382  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5383  "@
5384   pmulld\t{%2, %0|%0, %2}
5385   vpmulld\t{%2, %1, %0|%0, %1, %2}"
5386  [(set_attr "isa" "noavx,avx")
5387   (set_attr "type" "sseimul")
5388   (set_attr "prefix_extra" "1")
5389   (set_attr "prefix" "orig,vex")
5390   (set_attr "mode" "<sseinsnmode>")])
5391
5392(define_insn_and_split "*sse2_mulv4si3"
5393  [(set (match_operand:V4SI 0 "register_operand" "")
5394	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5395		   (match_operand:V4SI 2 "register_operand" "")))]
5396  "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5397   && can_create_pseudo_p ()"
5398  "#"
5399  "&& 1"
5400  [(const_int 0)]
5401{
5402  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5403  rtx op0, op1, op2;
5404
5405  op0 = operands[0];
5406  op1 = operands[1];
5407  op2 = operands[2];
5408  t1 = gen_reg_rtx (V4SImode);
5409  t2 = gen_reg_rtx (V4SImode);
5410  t3 = gen_reg_rtx (V4SImode);
5411  t4 = gen_reg_rtx (V4SImode);
5412  t5 = gen_reg_rtx (V4SImode);
5413  t6 = gen_reg_rtx (V4SImode);
5414  thirtytwo = GEN_INT (32);
5415
5416  /* Multiply elements 2 and 0.  */
5417  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5418				     op1, op2));
5419
5420  /* Shift both input vectors down one element, so that elements 3
5421     and 1 are now in the slots for elements 2 and 0.  For K8, at
5422     least, this is faster than using a shuffle.  */
5423  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5424				 gen_lowpart (V1TImode, op1),
5425				 thirtytwo));
5426  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5427				 gen_lowpart (V1TImode, op2),
5428				 thirtytwo));
5429  /* Multiply elements 3 and 1.  */
5430  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5431				     t2, t3));
5432
5433  /* Move the results in element 2 down to element 1; we don't care
5434     what goes in elements 2 and 3.  */
5435  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5436				const0_rtx, const0_rtx));
5437  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5438				const0_rtx, const0_rtx));
5439
5440  /* Merge the parts back together.  */
5441  emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5442
5443  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5444		       gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5445  DONE;
5446})
5447
5448(define_insn_and_split "mul<mode>3"
5449  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5450	(mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5451		       (match_operand:VI8_AVX2 2 "register_operand" "")))]
5452  "TARGET_SSE2
5453   && can_create_pseudo_p ()"
5454  "#"
5455  "&& 1"
5456  [(const_int 0)]
5457{
5458  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5459  rtx op0, op1, op2;
5460
5461  op0 = operands[0];
5462  op1 = operands[1];
5463  op2 = operands[2];
5464
5465  if (TARGET_XOP && <MODE>mode == V2DImode)
5466    {
5467      /* op1: A,B,C,D, op2: E,F,G,H */
5468      op1 = gen_lowpart (V4SImode, op1);
5469      op2 = gen_lowpart (V4SImode, op2);
5470
5471      t1 = gen_reg_rtx (V4SImode);
5472      t2 = gen_reg_rtx (V4SImode);
5473      t3 = gen_reg_rtx (V2DImode);
5474      t4 = gen_reg_rtx (V2DImode);
5475
5476      /* t1: B,A,D,C */
5477      emit_insn (gen_sse2_pshufd_1 (t1, op1,
5478				    GEN_INT (1),
5479				    GEN_INT (0),
5480				    GEN_INT (3),
5481				    GEN_INT (2)));
5482
5483      /* t2: (B*E),(A*F),(D*G),(C*H) */
5484      emit_insn (gen_mulv4si3 (t2, t1, op2));
5485
5486      /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5487      emit_insn (gen_xop_phadddq (t3, t2));
5488
5489      /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5490      emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5491
5492      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5493      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5494    }
5495  else
5496    {
5497      t1 = gen_reg_rtx (<MODE>mode);
5498      t2 = gen_reg_rtx (<MODE>mode);
5499      t3 = gen_reg_rtx (<MODE>mode);
5500      t4 = gen_reg_rtx (<MODE>mode);
5501      t5 = gen_reg_rtx (<MODE>mode);
5502      t6 = gen_reg_rtx (<MODE>mode);
5503      thirtytwo = GEN_INT (32);
5504
5505      /* Multiply low parts.  */
5506      emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5507		  (t1, gen_lowpart (<ssepackmode>mode, op1),
5508		   gen_lowpart (<ssepackmode>mode, op2)));
5509
5510      /* Shift input vectors right 32 bits so we can multiply high parts.  */
5511      emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5512      emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5513
5514      /* Multiply high parts by low parts.  */
5515      emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5516		  (t4, gen_lowpart (<ssepackmode>mode, op1),
5517		   gen_lowpart (<ssepackmode>mode, t3)));
5518      emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5519		  (t5, gen_lowpart (<ssepackmode>mode, op2),
5520		   gen_lowpart (<ssepackmode>mode, t2)));
5521
5522      /* Shift them back.  */
5523      emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5524      emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5525
5526      /* Add the three parts together.  */
5527      emit_insn (gen_add<mode>3 (t6, t1, t4));
5528      emit_insn (gen_add<mode>3 (op0, t6, t5));
5529    }
5530
5531  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5532		       gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5533  DONE;
5534})
5535
5536(define_expand "vec_widen_<s>mult_hi_<mode>"
5537  [(match_operand:<sseunpackmode> 0 "register_operand" "")
5538   (any_extend:<sseunpackmode>
5539     (match_operand:VI2_AVX2 1 "register_operand" ""))
5540   (match_operand:VI2_AVX2 2 "register_operand" "")]
5541  "TARGET_SSE2"
5542{
5543  rtx op1, op2, t1, t2, dest;
5544
5545  op1 = operands[1];
5546  op2 = operands[2];
5547  t1 = gen_reg_rtx (<MODE>mode);
5548  t2 = gen_reg_rtx (<MODE>mode);
5549  dest = gen_lowpart (<MODE>mode, operands[0]);
5550
5551  emit_insn (gen_mul<mode>3 (t1, op1, op2));
5552  emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5553  emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5554  DONE;
5555})
5556
5557(define_expand "vec_widen_<s>mult_lo_<mode>"
5558  [(match_operand:<sseunpackmode> 0 "register_operand" "")
5559   (any_extend:<sseunpackmode>
5560     (match_operand:VI2_AVX2 1 "register_operand" ""))
5561   (match_operand:VI2_AVX2 2 "register_operand" "")]
5562  "TARGET_SSE2"
5563{
5564  rtx op1, op2, t1, t2, dest;
5565
5566  op1 = operands[1];
5567  op2 = operands[2];
5568  t1 = gen_reg_rtx (<MODE>mode);
5569  t2 = gen_reg_rtx (<MODE>mode);
5570  dest = gen_lowpart (<MODE>mode, operands[0]);
5571
5572  emit_insn (gen_mul<mode>3 (t1, op1, op2));
5573  emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5574  emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5575  DONE;
5576})
5577
5578(define_expand "vec_widen_<s>mult_hi_v8si"
5579  [(match_operand:V4DI 0 "register_operand" "")
5580   (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5581   (match_operand:V8SI 2 "nonimmediate_operand" "")]
5582  "TARGET_AVX2"
5583{
5584  rtx t1, t2, t3, t4;
5585
5586  t1 = gen_reg_rtx (V4DImode);
5587  t2 = gen_reg_rtx (V4DImode);
5588  t3 = gen_reg_rtx (V8SImode);
5589  t4 = gen_reg_rtx (V8SImode);
5590  emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5591				  const0_rtx, const2_rtx,
5592				  const1_rtx, GEN_INT (3)));
5593  emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5594				  const0_rtx, const2_rtx,
5595				  const1_rtx, GEN_INT (3)));
5596  emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5597				GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5598  emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5599				GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5600  emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5601  DONE;
5602})
5603
5604(define_expand "vec_widen_<s>mult_lo_v8si"
5605  [(match_operand:V4DI 0 "register_operand" "")
5606   (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5607   (match_operand:V8SI 2 "nonimmediate_operand" "")]
5608  "TARGET_AVX2"
5609{
5610  rtx t1, t2, t3, t4;
5611
5612  t1 = gen_reg_rtx (V4DImode);
5613  t2 = gen_reg_rtx (V4DImode);
5614  t3 = gen_reg_rtx (V8SImode);
5615  t4 = gen_reg_rtx (V8SImode);
5616  emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5617				  const0_rtx, const2_rtx,
5618				  const1_rtx, GEN_INT (3)));
5619  emit_insn (gen_avx2_permv4di_1 (t2,  gen_lowpart (V4DImode, operands[2]),
5620				  const0_rtx, const2_rtx,
5621				  const1_rtx, GEN_INT (3)));
5622  emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5623				GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5624  emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5625				GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5626  emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5627  DONE;
5628})
5629
5630(define_expand "vec_widen_smult_hi_v4si"
5631  [(match_operand:V2DI 0 "register_operand" "")
5632   (match_operand:V4SI 1 "register_operand" "")
5633   (match_operand:V4SI 2 "register_operand" "")]
5634  "TARGET_SSE4_1"
5635{
5636  rtx op1, op2, t1, t2;
5637
5638  op1 = operands[1];
5639  op2 = operands[2];
5640  t1 = gen_reg_rtx (V4SImode);
5641  t2 = gen_reg_rtx (V4SImode);
5642
5643  if (TARGET_XOP)
5644    {
5645      rtx t3 = gen_reg_rtx (V2DImode);
5646
5647      emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5648				    GEN_INT (1), GEN_INT (3)));
5649      emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5650				    GEN_INT (1), GEN_INT (3)));
5651      emit_move_insn (t3, CONST0_RTX (V2DImode));
5652
5653      emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
5654      DONE;
5655    }
5656
5657  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5658  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5659  emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5660  DONE;
5661})
5662
5663(define_expand "vec_widen_smult_lo_v4si"
5664  [(match_operand:V2DI 0 "register_operand" "")
5665   (match_operand:V4SI 1 "register_operand" "")
5666   (match_operand:V4SI 2 "register_operand" "")]
5667  "TARGET_SSE4_1"
5668{
5669  rtx op1, op2, t1, t2;
5670
5671  op1 = operands[1];
5672  op2 = operands[2];
5673  t1 = gen_reg_rtx (V4SImode);
5674  t2 = gen_reg_rtx (V4SImode);
5675
5676  if (TARGET_XOP)
5677    {
5678      rtx t3 = gen_reg_rtx (V2DImode);
5679
5680      emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5681				    GEN_INT (1), GEN_INT (3)));
5682      emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5683				    GEN_INT (1), GEN_INT (3)));
5684      emit_move_insn (t3, CONST0_RTX (V2DImode));
5685
5686      emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
5687      DONE;
5688    }
5689
5690  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5691  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5692  emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5693  DONE;
5694})
5695
5696(define_expand "vec_widen_umult_hi_v4si"
5697  [(match_operand:V2DI 0 "register_operand" "")
5698   (match_operand:V4SI 1 "register_operand" "")
5699   (match_operand:V4SI 2 "register_operand" "")]
5700  "TARGET_SSE2"
5701{
5702  rtx op1, op2, t1, t2;
5703
5704  op1 = operands[1];
5705  op2 = operands[2];
5706  t1 = gen_reg_rtx (V4SImode);
5707  t2 = gen_reg_rtx (V4SImode);
5708
5709  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5710  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5711  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5712  DONE;
5713})
5714
5715(define_expand "vec_widen_umult_lo_v4si"
5716  [(match_operand:V2DI 0 "register_operand" "")
5717   (match_operand:V4SI 1 "register_operand" "")
5718   (match_operand:V4SI 2 "register_operand" "")]
5719  "TARGET_SSE2"
5720{
5721  rtx op1, op2, t1, t2;
5722
5723  op1 = operands[1];
5724  op2 = operands[2];
5725  t1 = gen_reg_rtx (V4SImode);
5726  t2 = gen_reg_rtx (V4SImode);
5727
5728  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5729  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5730  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5731  DONE;
5732})
5733
5734(define_expand "sdot_prod<mode>"
5735  [(match_operand:<sseunpackmode> 0 "register_operand" "")
5736   (match_operand:VI2_AVX2 1 "register_operand" "")
5737   (match_operand:VI2_AVX2 2 "register_operand" "")
5738   (match_operand:<sseunpackmode> 3 "register_operand" "")]
5739  "TARGET_SSE2"
5740{
5741  rtx t = gen_reg_rtx (<sseunpackmode>mode);
5742  emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5743  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5744			  gen_rtx_PLUS (<sseunpackmode>mode,
5745					operands[3], t)));
5746  DONE;
5747})
5748
5749(define_code_attr sse2_sse4_1
5750   [(zero_extend "sse2") (sign_extend "sse4_1")])
5751
5752(define_expand "<s>dot_prodv4si"
5753  [(match_operand:V2DI 0 "register_operand" "")
5754   (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5755   (match_operand:V4SI 2 "register_operand" "")
5756   (match_operand:V2DI 3 "register_operand" "")]
5757  "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5758{
5759  rtx t1, t2, t3, t4;
5760
5761  t1 = gen_reg_rtx (V2DImode);
5762  emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5763  emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5764
5765  t2 = gen_reg_rtx (V4SImode);
5766  t3 = gen_reg_rtx (V4SImode);
5767  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5768				 gen_lowpart (V1TImode, operands[1]),
5769				 GEN_INT (32)));
5770  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5771				 gen_lowpart (V1TImode, operands[2]),
5772				 GEN_INT (32)));
5773
5774  t4 = gen_reg_rtx (V2DImode);
5775  emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5776
5777  emit_insn (gen_addv2di3 (operands[0], t1, t4));
5778  DONE;
5779})
5780
5781(define_expand "<s>dot_prodv8si"
5782  [(match_operand:V4DI 0 "register_operand" "")
5783   (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5784   (match_operand:V8SI 2 "register_operand" "")
5785   (match_operand:V4DI 3 "register_operand" "")]
5786  "TARGET_AVX2"
5787{
5788  rtx t1, t2, t3, t4;
5789
5790  t1 = gen_reg_rtx (V4DImode);
5791  emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5792  emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5793
5794  t2 = gen_reg_rtx (V8SImode);
5795  t3 = gen_reg_rtx (V8SImode);
5796  emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5797				 gen_lowpart (V2TImode, operands[1]),
5798				 GEN_INT (32)));
5799  emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5800				 gen_lowpart (V2TImode, operands[2]),
5801				 GEN_INT (32)));
5802
5803  t4 = gen_reg_rtx (V4DImode);
5804  emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5805
5806  emit_insn (gen_addv4di3 (operands[0], t1, t4));
5807  DONE;
5808})
5809
5810(define_insn "ashr<mode>3"
5811  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5812	(ashiftrt:VI24_AVX2
5813	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5814	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5815  "TARGET_SSE2"
5816  "@
5817   psra<ssemodesuffix>\t{%2, %0|%0, %2}
5818   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5819  [(set_attr "isa" "noavx,avx")
5820   (set_attr "type" "sseishft")
5821   (set (attr "length_immediate")
5822     (if_then_else (match_operand 2 "const_int_operand" "")
5823       (const_string "1")
5824       (const_string "0")))
5825   (set_attr "prefix_data16" "1,*")
5826   (set_attr "prefix" "orig,vex")
5827   (set_attr "mode" "<sseinsnmode>")])
5828
5829(define_insn "<shift_insn><mode>3"
5830  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5831	(any_lshift:VI248_AVX2
5832	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5833	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5834  "TARGET_SSE2"
5835  "@
5836   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5837   vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5838  [(set_attr "isa" "noavx,avx")
5839   (set_attr "type" "sseishft")
5840   (set (attr "length_immediate")
5841     (if_then_else (match_operand 2 "const_int_operand" "")
5842       (const_string "1")
5843       (const_string "0")))
5844   (set_attr "prefix_data16" "1,*")
5845   (set_attr "prefix" "orig,vex")
5846   (set_attr "mode" "<sseinsnmode>")])
5847
5848(define_expand "vec_shl_<mode>"
5849  [(set (match_operand:VI_128 0 "register_operand" "")
5850	(ashift:V1TI
5851	 (match_operand:VI_128 1 "register_operand" "")
5852	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5853  "TARGET_SSE2"
5854{
5855  operands[0] = gen_lowpart (V1TImode, operands[0]);
5856  operands[1] = gen_lowpart (V1TImode, operands[1]);
5857})
5858
5859(define_insn "<sse2_avx2>_ashl<mode>3"
5860  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5861	(ashift:VIMAX_AVX2
5862	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5863	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5864  "TARGET_SSE2"
5865{
5866  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5867
5868  switch (which_alternative)
5869    {
5870    case 0:
5871      return "pslldq\t{%2, %0|%0, %2}";
5872    case 1:
5873      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5874    default:
5875      gcc_unreachable ();
5876    }
5877}
5878  [(set_attr "isa" "noavx,avx")
5879   (set_attr "type" "sseishft")
5880   (set_attr "length_immediate" "1")
5881   (set_attr "prefix_data16" "1,*")
5882   (set_attr "prefix" "orig,vex")
5883   (set_attr "mode" "<sseinsnmode>")])
5884
5885(define_expand "vec_shr_<mode>"
5886  [(set (match_operand:VI_128 0 "register_operand" "")
5887	(lshiftrt:V1TI
5888	 (match_operand:VI_128 1 "register_operand" "")
5889	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5890  "TARGET_SSE2"
5891{
5892  operands[0] = gen_lowpart (V1TImode, operands[0]);
5893  operands[1] = gen_lowpart (V1TImode, operands[1]);
5894})
5895
5896(define_insn "<sse2_avx2>_lshr<mode>3"
5897  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5898	(lshiftrt:VIMAX_AVX2
5899	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5900	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5901  "TARGET_SSE2"
5902{
5903  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5904
5905  switch (which_alternative)
5906    {
5907    case 0:
5908      return "psrldq\t{%2, %0|%0, %2}";
5909    case 1:
5910      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5911    default:
5912      gcc_unreachable ();
5913    }
5914}
5915  [(set_attr "isa" "noavx,avx")
5916   (set_attr "type" "sseishft")
5917   (set_attr "length_immediate" "1")
5918   (set_attr "atom_unit" "sishuf")
5919   (set_attr "prefix_data16" "1,*")
5920   (set_attr "prefix" "orig,vex")
5921   (set_attr "mode" "<sseinsnmode>")])
5922
5923
5924(define_expand "<code><mode>3"
5925  [(set (match_operand:VI124_256 0 "register_operand" "")
5926	(maxmin:VI124_256
5927	  (match_operand:VI124_256 1 "nonimmediate_operand" "")
5928	  (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5929  "TARGET_AVX2"
5930  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5931
5932(define_insn "*avx2_<code><mode>3"
5933  [(set (match_operand:VI124_256 0 "register_operand" "=x")
5934	(maxmin:VI124_256
5935	  (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5936	  (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5937  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5938  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5939  [(set_attr "type" "sseiadd")
5940   (set_attr "prefix_extra" "1")
5941   (set_attr "prefix" "vex")
5942   (set_attr "mode" "OI")])
5943
5944(define_expand "<code><mode>3"
5945  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5946	(maxmin:VI8_AVX2
5947	  (match_operand:VI8_AVX2 1 "register_operand" "")
5948	  (match_operand:VI8_AVX2 2 "register_operand" "")))]
5949  "TARGET_SSE4_2"
5950{
5951  enum rtx_code code;
5952  rtx xops[6];
5953  bool ok;
5954
5955  xops[0] = operands[0];
5956
5957  if (<CODE> == SMAX || <CODE> == UMAX)
5958    {
5959      xops[1] = operands[1];
5960      xops[2] = operands[2];
5961    }
5962  else
5963    {
5964      xops[1] = operands[2];
5965      xops[2] = operands[1];
5966    }
5967
5968  code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5969
5970  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5971  xops[4] = operands[1];
5972  xops[5] = operands[2];
5973
5974  ok = ix86_expand_int_vcond (xops);
5975  gcc_assert (ok);
5976  DONE;
5977})
5978
5979(define_expand "<code><mode>3"
5980  [(set (match_operand:VI124_128 0 "register_operand" "")
5981	(smaxmin:VI124_128
5982	  (match_operand:VI124_128 1 "nonimmediate_operand" "")
5983	  (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5984  "TARGET_SSE2"
5985{
5986  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5987    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5988  else
5989    {
5990      rtx xops[6];
5991      bool ok;
5992
5993      xops[0] = operands[0];
5994      operands[1] = force_reg (<MODE>mode, operands[1]);
5995      operands[2] = force_reg (<MODE>mode, operands[2]);
5996
5997      if (<CODE> == SMAX)
5998	{
5999	  xops[1] = operands[1];
6000	  xops[2] = operands[2];
6001	}
6002      else
6003	{
6004	  xops[1] = operands[2];
6005	  xops[2] = operands[1];
6006	}
6007
6008      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6009      xops[4] = operands[1];
6010      xops[5] = operands[2];
6011
6012      ok = ix86_expand_int_vcond (xops);
6013      gcc_assert (ok);
6014      DONE;
6015    }
6016})
6017
6018(define_insn "*sse4_1_<code><mode>3"
6019  [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6020	(smaxmin:VI14_128
6021	  (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6022	  (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6023  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6024  "@
6025   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6026   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6027  [(set_attr "isa" "noavx,avx")
6028   (set_attr "type" "sseiadd")
6029   (set_attr "prefix_extra" "1,*")
6030   (set_attr "prefix" "orig,vex")
6031   (set_attr "mode" "TI")])
6032
6033(define_insn "*<code>v8hi3"
6034  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6035	(smaxmin:V8HI
6036	  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6037	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6038  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6039  "@
6040   p<maxmin_int>w\t{%2, %0|%0, %2}
6041   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6042  [(set_attr "isa" "noavx,avx")
6043   (set_attr "type" "sseiadd")
6044   (set_attr "prefix_data16" "1,*")
6045   (set_attr "prefix_extra" "*,1")
6046   (set_attr "prefix" "orig,vex")
6047   (set_attr "mode" "TI")])
6048
6049(define_expand "<code><mode>3"
6050  [(set (match_operand:VI124_128 0 "register_operand" "")
6051	(umaxmin:VI124_128
6052	  (match_operand:VI124_128 1 "nonimmediate_operand" "")
6053	  (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6054  "TARGET_SSE2"
6055{
6056  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6057    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6058  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6059    {
6060      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6061      operands[1] = force_reg (<MODE>mode, operands[1]);
6062      if (rtx_equal_p (op3, op2))
6063	op3 = gen_reg_rtx (V8HImode);
6064      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6065      emit_insn (gen_addv8hi3 (op0, op3, op2));
6066      DONE;
6067    }
6068  else
6069    {
6070      rtx xops[6];
6071      bool ok;
6072
6073      operands[1] = force_reg (<MODE>mode, operands[1]);
6074      operands[2] = force_reg (<MODE>mode, operands[2]);
6075
6076      xops[0] = operands[0];
6077
6078      if (<CODE> == UMAX)
6079	{
6080	  xops[1] = operands[1];
6081	  xops[2] = operands[2];
6082	}
6083      else
6084	{
6085	  xops[1] = operands[2];
6086	  xops[2] = operands[1];
6087	}
6088
6089      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6090      xops[4] = operands[1];
6091      xops[5] = operands[2];
6092
6093      ok = ix86_expand_int_vcond (xops);
6094      gcc_assert (ok);
6095      DONE;
6096    }
6097})
6098
6099(define_insn "*sse4_1_<code><mode>3"
6100  [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6101	(umaxmin:VI24_128
6102	  (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6103	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6104  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6105  "@
6106   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6107   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6108  [(set_attr "isa" "noavx,avx")
6109   (set_attr "type" "sseiadd")
6110   (set_attr "prefix_extra" "1,*")
6111   (set_attr "prefix" "orig,vex")
6112   (set_attr "mode" "TI")])
6113
6114(define_insn "*<code>v16qi3"
6115  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6116	(umaxmin:V16QI
6117	  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6118	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6119  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6120  "@
6121   p<maxmin_int>b\t{%2, %0|%0, %2}
6122   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6123  [(set_attr "isa" "noavx,avx")
6124   (set_attr "type" "sseiadd")
6125   (set_attr "prefix_data16" "1,*")
6126   (set_attr "prefix_extra" "*,1")
6127   (set_attr "prefix" "orig,vex")
6128   (set_attr "mode" "TI")])
6129
6130;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6131;;
6132;; Parallel integral comparisons
6133;;
6134;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6135
6136(define_expand "avx2_eq<mode>3"
6137  [(set (match_operand:VI_256 0 "register_operand" "")
6138	(eq:VI_256
6139	  (match_operand:VI_256 1 "nonimmediate_operand" "")
6140	  (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6141  "TARGET_AVX2"
6142  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6143
6144(define_insn "*avx2_eq<mode>3"
6145  [(set (match_operand:VI_256 0 "register_operand" "=x")
6146	(eq:VI_256
6147	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6148	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6149  "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6150  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6151  [(set_attr "type" "ssecmp")
6152   (set_attr "prefix_extra" "1")
6153   (set_attr "prefix" "vex")
6154   (set_attr "mode" "OI")])
6155
6156(define_insn "*sse4_1_eqv2di3"
6157  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6158	(eq:V2DI
6159	  (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6160	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6161  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6162  "@
6163   pcmpeqq\t{%2, %0|%0, %2}
6164   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6165  [(set_attr "isa" "noavx,avx")
6166   (set_attr "type" "ssecmp")
6167   (set_attr "prefix_extra" "1")
6168   (set_attr "prefix" "orig,vex")
6169   (set_attr "mode" "TI")])
6170
6171(define_insn "*sse2_eq<mode>3"
6172  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6173	(eq:VI124_128
6174	  (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6175	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6176  "TARGET_SSE2 && !TARGET_XOP
6177   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6178  "@
6179   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6180   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6181  [(set_attr "isa" "noavx,avx")
6182   (set_attr "type" "ssecmp")
6183   (set_attr "prefix_data16" "1,*")
6184   (set_attr "prefix" "orig,vex")
6185   (set_attr "mode" "TI")])
6186
6187(define_expand "sse2_eq<mode>3"
6188  [(set (match_operand:VI124_128 0 "register_operand" "")
6189	(eq:VI124_128
6190	  (match_operand:VI124_128 1 "nonimmediate_operand" "")
6191	  (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6192  "TARGET_SSE2 && !TARGET_XOP "
6193  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6194
6195(define_expand "sse4_1_eqv2di3"
6196  [(set (match_operand:V2DI 0 "register_operand" "")
6197	(eq:V2DI
6198	  (match_operand:V2DI 1 "nonimmediate_operand" "")
6199	  (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6200  "TARGET_SSE4_1"
6201  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6202
6203(define_insn "sse4_2_gtv2di3"
6204  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6205	(gt:V2DI
6206	  (match_operand:V2DI 1 "register_operand" "0,x")
6207	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6208  "TARGET_SSE4_2"
6209  "@
6210   pcmpgtq\t{%2, %0|%0, %2}
6211   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6212  [(set_attr "isa" "noavx,avx")
6213   (set_attr "type" "ssecmp")
6214   (set_attr "prefix_extra" "1")
6215   (set_attr "prefix" "orig,vex")
6216   (set_attr "mode" "TI")])
6217
6218(define_insn "avx2_gt<mode>3"
6219  [(set (match_operand:VI_256 0 "register_operand" "=x")
6220	(gt:VI_256
6221	  (match_operand:VI_256 1 "register_operand" "x")
6222	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6223  "TARGET_AVX2"
6224  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6225  [(set_attr "type" "ssecmp")
6226   (set_attr "prefix_extra" "1")
6227   (set_attr "prefix" "vex")
6228   (set_attr "mode" "OI")])
6229
6230(define_insn "sse2_gt<mode>3"
6231  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6232	(gt:VI124_128
6233	  (match_operand:VI124_128 1 "register_operand" "0,x")
6234	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6235  "TARGET_SSE2 && !TARGET_XOP"
6236  "@
6237   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6238   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6239  [(set_attr "isa" "noavx,avx")
6240   (set_attr "type" "ssecmp")
6241   (set_attr "prefix_data16" "1,*")
6242   (set_attr "prefix" "orig,vex")
6243   (set_attr "mode" "TI")])
6244
6245(define_expand "vcond<V_256:mode><VI_256:mode>"
6246  [(set (match_operand:V_256 0 "register_operand" "")
6247	(if_then_else:V_256
6248	  (match_operator 3 ""
6249	    [(match_operand:VI_256 4 "nonimmediate_operand" "")
6250	     (match_operand:VI_256 5 "general_operand" "")])
6251	  (match_operand:V_256 1 "" "")
6252	  (match_operand:V_256 2 "" "")))]
6253  "TARGET_AVX2
6254   && (GET_MODE_NUNITS (<V_256:MODE>mode)
6255       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6256{
6257  bool ok = ix86_expand_int_vcond (operands);
6258  gcc_assert (ok);
6259  DONE;
6260})
6261
6262(define_expand "vcond<V_128:mode><VI124_128:mode>"
6263  [(set (match_operand:V_128 0 "register_operand" "")
6264	(if_then_else:V_128
6265	  (match_operator 3 ""
6266	    [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6267	     (match_operand:VI124_128 5 "general_operand" "")])
6268	  (match_operand:V_128 1 "" "")
6269	  (match_operand:V_128 2 "" "")))]
6270  "TARGET_SSE2
6271   && (GET_MODE_NUNITS (<V_128:MODE>mode)
6272       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6273{
6274  bool ok = ix86_expand_int_vcond (operands);
6275  gcc_assert (ok);
6276  DONE;
6277})
6278
6279(define_expand "vcond<VI8F_128:mode>v2di"
6280  [(set (match_operand:VI8F_128 0 "register_operand" "")
6281	(if_then_else:VI8F_128
6282	  (match_operator 3 ""
6283	    [(match_operand:V2DI 4 "nonimmediate_operand" "")
6284	     (match_operand:V2DI 5 "general_operand" "")])
6285	  (match_operand:VI8F_128 1 "" "")
6286	  (match_operand:VI8F_128 2 "" "")))]
6287  "TARGET_SSE4_2"
6288{
6289  bool ok = ix86_expand_int_vcond (operands);
6290  gcc_assert (ok);
6291  DONE;
6292})
6293
6294(define_expand "vcondu<V_256:mode><VI_256:mode>"
6295  [(set (match_operand:V_256 0 "register_operand" "")
6296	(if_then_else:V_256
6297	  (match_operator 3 ""
6298	    [(match_operand:VI_256 4 "nonimmediate_operand" "")
6299	     (match_operand:VI_256 5 "nonimmediate_operand" "")])
6300	  (match_operand:V_256 1 "general_operand" "")
6301	  (match_operand:V_256 2 "general_operand" "")))]
6302  "TARGET_AVX2
6303   && (GET_MODE_NUNITS (<V_256:MODE>mode)
6304       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6305{
6306  bool ok = ix86_expand_int_vcond (operands);
6307  gcc_assert (ok);
6308  DONE;
6309})
6310
6311(define_expand "vcondu<V_128:mode><VI124_128:mode>"
6312  [(set (match_operand:V_128 0 "register_operand" "")
6313	(if_then_else:V_128
6314	  (match_operator 3 ""
6315	    [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6316	     (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6317	  (match_operand:V_128 1 "general_operand" "")
6318	  (match_operand:V_128 2 "general_operand" "")))]
6319  "TARGET_SSE2
6320   && (GET_MODE_NUNITS (<V_128:MODE>mode)
6321       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6322{
6323  bool ok = ix86_expand_int_vcond (operands);
6324  gcc_assert (ok);
6325  DONE;
6326})
6327
6328(define_expand "vcondu<VI8F_128:mode>v2di"
6329  [(set (match_operand:VI8F_128 0 "register_operand" "")
6330	(if_then_else:VI8F_128
6331	  (match_operator 3 ""
6332	    [(match_operand:V2DI 4 "nonimmediate_operand" "")
6333	     (match_operand:V2DI 5 "nonimmediate_operand" "")])
6334	  (match_operand:VI8F_128 1 "general_operand" "")
6335	  (match_operand:VI8F_128 2 "general_operand" "")))]
6336  "TARGET_SSE4_2"
6337{
6338  bool ok = ix86_expand_int_vcond (operands);
6339  gcc_assert (ok);
6340  DONE;
6341})
6342
6343(define_mode_iterator VEC_PERM_AVX2
6344  [V16QI V8HI V4SI V2DI V4SF V2DF
6345   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6346   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6347   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6348
6349(define_expand "vec_perm<mode>"
6350  [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6351   (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6352   (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6353   (match_operand:<sseintvecmode> 3 "register_operand" "")]
6354  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6355{
6356  ix86_expand_vec_perm (operands);
6357  DONE;
6358})
6359
6360(define_mode_iterator VEC_PERM_CONST
6361  [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6362   (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6363   (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6364   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6365   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6366   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6367
6368(define_expand "vec_perm_const<mode>"
6369  [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6370   (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6371   (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6372   (match_operand:<sseintvecmode> 3 "" "")]
6373  ""
6374{
6375  if (ix86_expand_vec_perm_const (operands))
6376    DONE;
6377  else
6378    FAIL;
6379})
6380
6381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6382;;
6383;; Parallel bitwise logical operations
6384;;
6385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6386
6387(define_expand "one_cmpl<mode>2"
6388  [(set (match_operand:VI 0 "register_operand" "")
6389	(xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6390		(match_dup 2)))]
6391  "TARGET_SSE"
6392{
6393  int i, n = GET_MODE_NUNITS (<MODE>mode);
6394  rtvec v = rtvec_alloc (n);
6395
6396  for (i = 0; i < n; ++i)
6397    RTVEC_ELT (v, i) = constm1_rtx;
6398
6399  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6400})
6401
6402(define_expand "<sse2_avx2>_andnot<mode>3"
6403  [(set (match_operand:VI_AVX2 0 "register_operand" "")
6404	(and:VI_AVX2
6405	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6406	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6407  "TARGET_SSE2")
6408
6409(define_insn "*andnot<mode>3"
6410  [(set (match_operand:VI 0 "register_operand" "=x,x")
6411	(and:VI
6412	  (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6413	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6414  "TARGET_SSE"
6415{
6416  static char buf[32];
6417  const char *ops;
6418  const char *tmp;
6419
6420  switch (get_attr_mode (insn))
6421    {
6422    case MODE_OI:
6423      gcc_assert (TARGET_AVX2);
6424    case MODE_TI:
6425      gcc_assert (TARGET_SSE2);
6426
6427      tmp = "pandn";
6428      break;
6429
6430   case MODE_V8SF:
6431      gcc_assert (TARGET_AVX);
6432   case MODE_V4SF:
6433      gcc_assert (TARGET_SSE);
6434
6435      tmp = "andnps";
6436      break;
6437
6438   default:
6439      gcc_unreachable ();
6440   }
6441
6442  switch (which_alternative)
6443    {
6444    case 0:
6445      ops = "%s\t{%%2, %%0|%%0, %%2}";
6446      break;
6447    case 1:
6448      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6449      break;
6450    default:
6451      gcc_unreachable ();
6452    }
6453
6454  snprintf (buf, sizeof (buf), ops, tmp);
6455  return buf;
6456}
6457  [(set_attr "isa" "noavx,avx")
6458   (set_attr "type" "sselog")
6459   (set (attr "prefix_data16")
6460     (if_then_else
6461       (and (eq_attr "alternative" "0")
6462	    (eq_attr "mode" "TI"))
6463       (const_string "1")
6464       (const_string "*")))
6465   (set_attr "prefix" "orig,vex")
6466   (set (attr "mode")
6467     (cond [(and (not (match_test "TARGET_AVX2"))
6468		 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6469	      (const_string "V8SF")
6470	    (not (match_test "TARGET_SSE2"))
6471	      (const_string "V4SF")
6472	   ]
6473	   (const_string "<sseinsnmode>")))])
6474
6475(define_expand "<code><mode>3"
6476  [(set (match_operand:VI 0 "register_operand" "")
6477	(any_logic:VI
6478	  (match_operand:VI 1 "nonimmediate_operand" "")
6479	  (match_operand:VI 2 "nonimmediate_operand" "")))]
6480  "TARGET_SSE"
6481  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6482
6483(define_insn "*<code><mode>3"
6484  [(set (match_operand:VI 0 "register_operand" "=x,x")
6485	(any_logic:VI
6486	  (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6487	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6488  "TARGET_SSE
6489   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6490{
6491  static char buf[32];
6492  const char *ops;
6493  const char *tmp;
6494
6495  switch (get_attr_mode (insn))
6496    {
6497    case MODE_OI:
6498      gcc_assert (TARGET_AVX2);
6499    case MODE_TI:
6500      gcc_assert (TARGET_SSE2);
6501
6502      tmp = "p<logic>";
6503      break;
6504
6505   case MODE_V8SF:
6506      gcc_assert (TARGET_AVX);
6507   case MODE_V4SF:
6508      gcc_assert (TARGET_SSE);
6509
6510      tmp = "<logic>ps";
6511      break;
6512
6513   default:
6514      gcc_unreachable ();
6515   }
6516
6517  switch (which_alternative)
6518    {
6519    case 0:
6520      ops = "%s\t{%%2, %%0|%%0, %%2}";
6521      break;
6522    case 1:
6523      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6524      break;
6525    default:
6526      gcc_unreachable ();
6527    }
6528
6529  snprintf (buf, sizeof (buf), ops, tmp);
6530  return buf;
6531}
6532  [(set_attr "isa" "noavx,avx")
6533   (set_attr "type" "sselog")
6534   (set (attr "prefix_data16")
6535     (if_then_else
6536       (and (eq_attr "alternative" "0")
6537	    (eq_attr "mode" "TI"))
6538       (const_string "1")
6539       (const_string "*")))
6540   (set_attr "prefix" "orig,vex")
6541   (set (attr "mode")
6542     (cond [(and (not (match_test "TARGET_AVX2"))
6543		 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6544	      (const_string "V8SF")
6545	    (not (match_test "TARGET_SSE2"))
6546	      (const_string "V4SF")
6547	   ]
6548	   (const_string "<sseinsnmode>")))])
6549
6550(define_insn "*andnottf3"
6551  [(set (match_operand:TF 0 "register_operand" "=x,x")
6552	(and:TF
6553	  (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6554	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6555  "TARGET_SSE2"
6556  "@
6557   pandn\t{%2, %0|%0, %2}
6558   vpandn\t{%2, %1, %0|%0, %1, %2}"
6559  [(set_attr "isa" "noavx,avx")
6560   (set_attr "type" "sselog")
6561   (set_attr "prefix_data16" "1,*")
6562   (set_attr "prefix" "orig,vex")
6563   (set_attr "mode" "TI")])
6564
6565(define_expand "<code>tf3"
6566  [(set (match_operand:TF 0 "register_operand" "")
6567	(any_logic:TF
6568	  (match_operand:TF 1 "nonimmediate_operand" "")
6569	  (match_operand:TF 2 "nonimmediate_operand" "")))]
6570  "TARGET_SSE2"
6571  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6572
6573(define_insn "*<code>tf3"
6574  [(set (match_operand:TF 0 "register_operand" "=x,x")
6575	(any_logic:TF
6576	  (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6577	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6578  "TARGET_SSE2
6579   && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6580  "@
6581   p<logic>\t{%2, %0|%0, %2}
6582   vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6583  [(set_attr "isa" "noavx,avx")
6584   (set_attr "type" "sselog")
6585   (set_attr "prefix_data16" "1,*")
6586   (set_attr "prefix" "orig,vex")
6587   (set_attr "mode" "TI")])
6588
6589;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6590;;
6591;; Parallel integral element swizzling
6592;;
6593;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6594
6595(define_expand "vec_pack_trunc_<mode>"
6596  [(match_operand:<ssepackmode> 0 "register_operand" "")
6597   (match_operand:VI248_AVX2 1 "register_operand" "")
6598   (match_operand:VI248_AVX2 2 "register_operand" "")]
6599  "TARGET_SSE2"
6600{
6601  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6602  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6603  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6604  DONE;
6605})
6606
6607(define_insn "<sse2_avx2>_packsswb"
6608  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6609	(vec_concat:VI1_AVX2
6610	  (ss_truncate:<ssehalfvecmode>
6611	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6612	  (ss_truncate:<ssehalfvecmode>
6613	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6614  "TARGET_SSE2"
6615  "@
6616   packsswb\t{%2, %0|%0, %2}
6617   vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6618  [(set_attr "isa" "noavx,avx")
6619   (set_attr "type" "sselog")
6620   (set_attr "prefix_data16" "1,*")
6621   (set_attr "prefix" "orig,vex")
6622   (set_attr "mode" "<sseinsnmode>")])
6623
6624(define_insn "<sse2_avx2>_packssdw"
6625  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6626	(vec_concat:VI2_AVX2
6627	  (ss_truncate:<ssehalfvecmode>
6628	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6629	  (ss_truncate:<ssehalfvecmode>
6630	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6631  "TARGET_SSE2"
6632  "@
6633   packssdw\t{%2, %0|%0, %2}
6634   vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6635  [(set_attr "isa" "noavx,avx")
6636   (set_attr "type" "sselog")
6637   (set_attr "prefix_data16" "1,*")
6638   (set_attr "prefix" "orig,vex")
6639   (set_attr "mode" "<sseinsnmode>")])
6640
6641(define_insn "<sse2_avx2>_packuswb"
6642  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6643	(vec_concat:VI1_AVX2
6644	  (us_truncate:<ssehalfvecmode>
6645	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6646	  (us_truncate:<ssehalfvecmode>
6647	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6648  "TARGET_SSE2"
6649  "@
6650   packuswb\t{%2, %0|%0, %2}
6651   vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6652  [(set_attr "isa" "noavx,avx")
6653   (set_attr "type" "sselog")
6654   (set_attr "prefix_data16" "1,*")
6655   (set_attr "prefix" "orig,vex")
6656   (set_attr "mode" "<sseinsnmode>")])
6657
6658(define_insn "avx2_interleave_highv32qi"
6659  [(set (match_operand:V32QI 0 "register_operand" "=x")
6660	(vec_select:V32QI
6661	  (vec_concat:V64QI
6662	    (match_operand:V32QI 1 "register_operand" "x")
6663	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6664	  (parallel [(const_int 8)  (const_int 40)
6665		     (const_int 9)  (const_int 41)
6666		     (const_int 10) (const_int 42)
6667		     (const_int 11) (const_int 43)
6668		     (const_int 12) (const_int 44)
6669		     (const_int 13) (const_int 45)
6670		     (const_int 14) (const_int 46)
6671		     (const_int 15) (const_int 47)
6672		     (const_int 24) (const_int 56)
6673		     (const_int 25) (const_int 57)
6674		     (const_int 26) (const_int 58)
6675		     (const_int 27) (const_int 59)
6676		     (const_int 28) (const_int 60)
6677		     (const_int 29) (const_int 61)
6678		     (const_int 30) (const_int 62)
6679		     (const_int 31) (const_int 63)])))]
6680  "TARGET_AVX2"
6681  "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6682  [(set_attr "type" "sselog")
6683   (set_attr "prefix" "vex")
6684   (set_attr "mode" "OI")])
6685
6686(define_insn "vec_interleave_highv16qi"
6687  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6688	(vec_select:V16QI
6689	  (vec_concat:V32QI
6690	    (match_operand:V16QI 1 "register_operand" "0,x")
6691	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6692	  (parallel [(const_int 8)  (const_int 24)
6693		     (const_int 9)  (const_int 25)
6694		     (const_int 10) (const_int 26)
6695		     (const_int 11) (const_int 27)
6696		     (const_int 12) (const_int 28)
6697		     (const_int 13) (const_int 29)
6698		     (const_int 14) (const_int 30)
6699		     (const_int 15) (const_int 31)])))]
6700  "TARGET_SSE2"
6701  "@
6702   punpckhbw\t{%2, %0|%0, %2}
6703   vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6704  [(set_attr "isa" "noavx,avx")
6705   (set_attr "type" "sselog")
6706   (set_attr "prefix_data16" "1,*")
6707   (set_attr "prefix" "orig,vex")
6708   (set_attr "mode" "TI")])
6709
6710(define_insn "avx2_interleave_lowv32qi"
6711  [(set (match_operand:V32QI 0 "register_operand" "=x")
6712	(vec_select:V32QI
6713	  (vec_concat:V64QI
6714	    (match_operand:V32QI 1 "register_operand" "x")
6715	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6716	  (parallel [(const_int 0) (const_int 32)
6717		     (const_int 1) (const_int 33)
6718		     (const_int 2) (const_int 34)
6719		     (const_int 3) (const_int 35)
6720		     (const_int 4) (const_int 36)
6721		     (const_int 5) (const_int 37)
6722		     (const_int 6) (const_int 38)
6723		     (const_int 7) (const_int 39)
6724		     (const_int 16) (const_int 48)
6725		     (const_int 17) (const_int 49)
6726		     (const_int 18) (const_int 50)
6727		     (const_int 19) (const_int 51)
6728		     (const_int 20) (const_int 52)
6729		     (const_int 21) (const_int 53)
6730		     (const_int 22) (const_int 54)
6731		     (const_int 23) (const_int 55)])))]
6732  "TARGET_AVX2"
6733  "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6734  [(set_attr "type" "sselog")
6735   (set_attr "prefix" "vex")
6736   (set_attr "mode" "OI")])
6737
6738(define_insn "vec_interleave_lowv16qi"
6739  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6740	(vec_select:V16QI
6741	  (vec_concat:V32QI
6742	    (match_operand:V16QI 1 "register_operand" "0,x")
6743	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6744	  (parallel [(const_int 0) (const_int 16)
6745		     (const_int 1) (const_int 17)
6746		     (const_int 2) (const_int 18)
6747		     (const_int 3) (const_int 19)
6748		     (const_int 4) (const_int 20)
6749		     (const_int 5) (const_int 21)
6750		     (const_int 6) (const_int 22)
6751		     (const_int 7) (const_int 23)])))]
6752  "TARGET_SSE2"
6753  "@
6754   punpcklbw\t{%2, %0|%0, %2}
6755   vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6756  [(set_attr "isa" "noavx,avx")
6757   (set_attr "type" "sselog")
6758   (set_attr "prefix_data16" "1,*")
6759   (set_attr "prefix" "orig,vex")
6760   (set_attr "mode" "TI")])
6761
6762(define_insn "avx2_interleave_highv16hi"
6763  [(set (match_operand:V16HI 0 "register_operand" "=x")
6764	(vec_select:V16HI
6765	  (vec_concat:V32HI
6766	    (match_operand:V16HI 1 "register_operand" "x")
6767	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6768	  (parallel [(const_int 4) (const_int 20)
6769		     (const_int 5) (const_int 21)
6770		     (const_int 6) (const_int 22)
6771		     (const_int 7) (const_int 23)
6772		     (const_int 12) (const_int 28)
6773		     (const_int 13) (const_int 29)
6774		     (const_int 14) (const_int 30)
6775		     (const_int 15) (const_int 31)])))]
6776  "TARGET_AVX2"
6777  "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6778  [(set_attr "type" "sselog")
6779   (set_attr "prefix" "vex")
6780   (set_attr "mode" "OI")])
6781
6782(define_insn "vec_interleave_highv8hi"
6783  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6784	(vec_select:V8HI
6785	  (vec_concat:V16HI
6786	    (match_operand:V8HI 1 "register_operand" "0,x")
6787	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6788	  (parallel [(const_int 4) (const_int 12)
6789		     (const_int 5) (const_int 13)
6790		     (const_int 6) (const_int 14)
6791		     (const_int 7) (const_int 15)])))]
6792  "TARGET_SSE2"
6793  "@
6794   punpckhwd\t{%2, %0|%0, %2}
6795   vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6796  [(set_attr "isa" "noavx,avx")
6797   (set_attr "type" "sselog")
6798   (set_attr "prefix_data16" "1,*")
6799   (set_attr "prefix" "orig,vex")
6800   (set_attr "mode" "TI")])
6801
6802(define_insn "avx2_interleave_lowv16hi"
6803  [(set (match_operand:V16HI 0 "register_operand" "=x")
6804	(vec_select:V16HI
6805	  (vec_concat:V32HI
6806	    (match_operand:V16HI 1 "register_operand" "x")
6807	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6808	  (parallel [(const_int 0) (const_int 16)
6809		     (const_int 1) (const_int 17)
6810		     (const_int 2) (const_int 18)
6811		     (const_int 3) (const_int 19)
6812		     (const_int 8) (const_int 24)
6813		     (const_int 9) (const_int 25)
6814		     (const_int 10) (const_int 26)
6815		     (const_int 11) (const_int 27)])))]
6816  "TARGET_AVX2"
6817  "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6818  [(set_attr "type" "sselog")
6819   (set_attr "prefix" "vex")
6820   (set_attr "mode" "OI")])
6821
6822(define_insn "vec_interleave_lowv8hi"
6823  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6824	(vec_select:V8HI
6825	  (vec_concat:V16HI
6826	    (match_operand:V8HI 1 "register_operand" "0,x")
6827	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6828	  (parallel [(const_int 0) (const_int 8)
6829		     (const_int 1) (const_int 9)
6830		     (const_int 2) (const_int 10)
6831		     (const_int 3) (const_int 11)])))]
6832  "TARGET_SSE2"
6833  "@
6834   punpcklwd\t{%2, %0|%0, %2}
6835   vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6836  [(set_attr "isa" "noavx,avx")
6837   (set_attr "type" "sselog")
6838   (set_attr "prefix_data16" "1,*")
6839   (set_attr "prefix" "orig,vex")
6840   (set_attr "mode" "TI")])
6841
6842(define_insn "avx2_interleave_highv8si"
6843  [(set (match_operand:V8SI 0 "register_operand" "=x")
6844	(vec_select:V8SI
6845	  (vec_concat:V16SI
6846	    (match_operand:V8SI 1 "register_operand" "x")
6847	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6848	  (parallel [(const_int 2) (const_int 10)
6849		     (const_int 3) (const_int 11)
6850		     (const_int 6) (const_int 14)
6851		     (const_int 7) (const_int 15)])))]
6852  "TARGET_AVX2"
6853  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6854  [(set_attr "type" "sselog")
6855   (set_attr "prefix" "vex")
6856   (set_attr "mode" "OI")])
6857
6858(define_insn "vec_interleave_highv4si"
6859  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6860	(vec_select:V4SI
6861	  (vec_concat:V8SI
6862	    (match_operand:V4SI 1 "register_operand" "0,x")
6863	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6864	  (parallel [(const_int 2) (const_int 6)
6865		     (const_int 3) (const_int 7)])))]
6866  "TARGET_SSE2"
6867  "@
6868   punpckhdq\t{%2, %0|%0, %2}
6869   vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6870  [(set_attr "isa" "noavx,avx")
6871   (set_attr "type" "sselog")
6872   (set_attr "prefix_data16" "1,*")
6873   (set_attr "prefix" "orig,vex")
6874   (set_attr "mode" "TI")])
6875
6876(define_insn "avx2_interleave_lowv8si"
6877  [(set (match_operand:V8SI 0 "register_operand" "=x")
6878	(vec_select:V8SI
6879	  (vec_concat:V16SI
6880	    (match_operand:V8SI 1 "register_operand" "x")
6881	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6882	  (parallel [(const_int 0) (const_int 8)
6883		     (const_int 1) (const_int 9)
6884		     (const_int 4) (const_int 12)
6885		     (const_int 5) (const_int 13)])))]
6886  "TARGET_AVX2"
6887  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6888  [(set_attr "type" "sselog")
6889   (set_attr "prefix" "vex")
6890   (set_attr "mode" "OI")])
6891
6892(define_insn "vec_interleave_lowv4si"
6893  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6894	(vec_select:V4SI
6895	  (vec_concat:V8SI
6896	    (match_operand:V4SI 1 "register_operand" "0,x")
6897	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6898	  (parallel [(const_int 0) (const_int 4)
6899		     (const_int 1) (const_int 5)])))]
6900  "TARGET_SSE2"
6901  "@
6902   punpckldq\t{%2, %0|%0, %2}
6903   vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6904  [(set_attr "isa" "noavx,avx")
6905   (set_attr "type" "sselog")
6906   (set_attr "prefix_data16" "1,*")
6907   (set_attr "prefix" "orig,vex")
6908   (set_attr "mode" "TI")])
6909
6910(define_expand "vec_interleave_high<mode>"
6911  [(match_operand:VI_256 0 "register_operand" "=x")
6912   (match_operand:VI_256 1 "register_operand" "x")
6913   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6914 "TARGET_AVX2"
6915{
6916  rtx t1 = gen_reg_rtx (<MODE>mode);
6917  rtx t2 = gen_reg_rtx (<MODE>mode);
6918  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6919  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
6920  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6921				gen_lowpart (V4DImode, t1),
6922				gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6923  DONE;
6924})
6925
6926(define_expand "vec_interleave_low<mode>"
6927  [(match_operand:VI_256 0 "register_operand" "=x")
6928   (match_operand:VI_256 1 "register_operand" "x")
6929   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6930 "TARGET_AVX2"
6931{
6932  rtx t1 = gen_reg_rtx (<MODE>mode);
6933  rtx t2 = gen_reg_rtx (<MODE>mode);
6934  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6935  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6936  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6937				gen_lowpart (V4DImode, t1),
6938				gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6939  DONE;
6940})
6941
6942;; Modes handled by pinsr patterns.
6943(define_mode_iterator PINSR_MODE
6944  [(V16QI "TARGET_SSE4_1") V8HI
6945   (V4SI "TARGET_SSE4_1")
6946   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6947
6948(define_mode_attr sse2p4_1
6949  [(V16QI "sse4_1") (V8HI "sse2")
6950   (V4SI "sse4_1") (V2DI "sse4_1")])
6951
6952;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6953(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6954  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6955	(vec_merge:PINSR_MODE
6956	  (vec_duplicate:PINSR_MODE
6957	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6958	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6959	  (match_operand:SI 3 "const_int_operand" "")))]
6960  "TARGET_SSE2
6961   && ((unsigned) exact_log2 (INTVAL (operands[3]))
6962       < GET_MODE_NUNITS (<MODE>mode))"
6963{
6964  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6965
6966  switch (which_alternative)
6967    {
6968    case 0:
6969      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6970	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6971      /* FALLTHRU */
6972    case 1:
6973      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6974    case 2:
6975      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6976	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6977      /* FALLTHRU */
6978    case 3:
6979      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6980    default:
6981      gcc_unreachable ();
6982    }
6983}
6984  [(set_attr "isa" "noavx,noavx,avx,avx")
6985   (set_attr "type" "sselog")
6986   (set (attr "prefix_rex")
6987     (if_then_else
6988       (and (not (match_test "TARGET_AVX"))
6989	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6990       (const_string "1")
6991       (const_string "*")))
6992   (set (attr "prefix_data16")
6993     (if_then_else
6994       (and (not (match_test "TARGET_AVX"))
6995	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6996       (const_string "1")
6997       (const_string "*")))
6998   (set (attr "prefix_extra")
6999     (if_then_else
7000       (and (not (match_test "TARGET_AVX"))
7001	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7002       (const_string "*")
7003       (const_string "1")))
7004   (set_attr "length_immediate" "1")
7005   (set_attr "prefix" "orig,orig,vex,vex")
7006   (set_attr "mode" "TI")])
7007
7008(define_insn "*sse4_1_pextrb_<mode>"
7009  [(set (match_operand:SWI48 0 "register_operand" "=r")
7010	(zero_extend:SWI48
7011	  (vec_select:QI
7012	    (match_operand:V16QI 1 "register_operand" "x")
7013	    (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7014  "TARGET_SSE4_1"
7015  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7016  [(set_attr "type" "sselog")
7017   (set_attr "prefix_extra" "1")
7018   (set_attr "length_immediate" "1")
7019   (set_attr "prefix" "maybe_vex")
7020   (set_attr "mode" "TI")])
7021
7022(define_insn "*sse4_1_pextrb_memory"
7023  [(set (match_operand:QI 0 "memory_operand" "=m")
7024	(vec_select:QI
7025	  (match_operand:V16QI 1 "register_operand" "x")
7026	  (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7027  "TARGET_SSE4_1"
7028  "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7029  [(set_attr "type" "sselog")
7030   (set_attr "prefix_extra" "1")
7031   (set_attr "length_immediate" "1")
7032   (set_attr "prefix" "maybe_vex")
7033   (set_attr "mode" "TI")])
7034
7035(define_insn "*sse2_pextrw_<mode>"
7036  [(set (match_operand:SWI48 0 "register_operand" "=r")
7037	(zero_extend:SWI48
7038	  (vec_select:HI
7039	    (match_operand:V8HI 1 "register_operand" "x")
7040	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7041  "TARGET_SSE2"
7042  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7043  [(set_attr "type" "sselog")
7044   (set_attr "prefix_data16" "1")
7045   (set_attr "length_immediate" "1")
7046   (set_attr "prefix" "maybe_vex")
7047   (set_attr "mode" "TI")])
7048
7049(define_insn "*sse4_1_pextrw_memory"
7050  [(set (match_operand:HI 0 "memory_operand" "=m")
7051	(vec_select:HI
7052	  (match_operand:V8HI 1 "register_operand" "x")
7053	  (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7054  "TARGET_SSE4_1"
7055  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7056  [(set_attr "type" "sselog")
7057   (set_attr "prefix_extra" "1")
7058   (set_attr "length_immediate" "1")
7059   (set_attr "prefix" "maybe_vex")
7060   (set_attr "mode" "TI")])
7061
7062(define_insn "*sse4_1_pextrd"
7063  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7064	(vec_select:SI
7065	  (match_operand:V4SI 1 "register_operand" "x")
7066	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7067  "TARGET_SSE4_1"
7068  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7069  [(set_attr "type" "sselog")
7070   (set_attr "prefix_extra" "1")
7071   (set_attr "length_immediate" "1")
7072   (set_attr "prefix" "maybe_vex")
7073   (set_attr "mode" "TI")])
7074
7075(define_insn "*sse4_1_pextrd_zext"
7076  [(set (match_operand:DI 0 "register_operand" "=r")
7077	(zero_extend:DI
7078	  (vec_select:SI
7079	    (match_operand:V4SI 1 "register_operand" "x")
7080	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7081  "TARGET_64BIT && TARGET_SSE4_1"
7082  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7083  [(set_attr "type" "sselog")
7084   (set_attr "prefix_extra" "1")
7085   (set_attr "length_immediate" "1")
7086   (set_attr "prefix" "maybe_vex")
7087   (set_attr "mode" "TI")])
7088
7089;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7090(define_insn "*sse4_1_pextrq"
7091  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7092	(vec_select:DI
7093	  (match_operand:V2DI 1 "register_operand" "x")
7094	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7095  "TARGET_SSE4_1 && TARGET_64BIT"
7096  "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7097  [(set_attr "type" "sselog")
7098   (set_attr "prefix_rex" "1")
7099   (set_attr "prefix_extra" "1")
7100   (set_attr "length_immediate" "1")
7101   (set_attr "prefix" "maybe_vex")
7102   (set_attr "mode" "TI")])
7103
7104(define_expand "avx2_pshufdv3"
7105  [(match_operand:V8SI 0 "register_operand" "")
7106   (match_operand:V8SI 1 "nonimmediate_operand" "")
7107   (match_operand:SI 2 "const_0_to_255_operand" "")]
7108  "TARGET_AVX2"
7109{
7110  int mask = INTVAL (operands[2]);
7111  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7112				GEN_INT ((mask >> 0) & 3),
7113				GEN_INT ((mask >> 2) & 3),
7114				GEN_INT ((mask >> 4) & 3),
7115				GEN_INT ((mask >> 6) & 3),
7116				GEN_INT (((mask >> 0) & 3) + 4),
7117				GEN_INT (((mask >> 2) & 3) + 4),
7118				GEN_INT (((mask >> 4) & 3) + 4),
7119				GEN_INT (((mask >> 6) & 3) + 4)));
7120  DONE;
7121})
7122
7123(define_insn "avx2_pshufd_1"
7124  [(set (match_operand:V8SI 0 "register_operand" "=x")
7125	(vec_select:V8SI
7126	  (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7127	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7128		     (match_operand 3 "const_0_to_3_operand" "")
7129		     (match_operand 4 "const_0_to_3_operand" "")
7130		     (match_operand 5 "const_0_to_3_operand" "")
7131		     (match_operand 6 "const_4_to_7_operand" "")
7132		     (match_operand 7 "const_4_to_7_operand" "")
7133		     (match_operand 8 "const_4_to_7_operand" "")
7134		     (match_operand 9 "const_4_to_7_operand" "")])))]
7135  "TARGET_AVX2
7136   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7137   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7138   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7139   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7140{
7141  int mask = 0;
7142  mask |= INTVAL (operands[2]) << 0;
7143  mask |= INTVAL (operands[3]) << 2;
7144  mask |= INTVAL (operands[4]) << 4;
7145  mask |= INTVAL (operands[5]) << 6;
7146  operands[2] = GEN_INT (mask);
7147
7148  return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7149}
7150  [(set_attr "type" "sselog1")
7151   (set_attr "prefix" "vex")
7152   (set_attr "length_immediate" "1")
7153   (set_attr "mode" "OI")])
7154
7155(define_expand "sse2_pshufd"
7156  [(match_operand:V4SI 0 "register_operand" "")
7157   (match_operand:V4SI 1 "nonimmediate_operand" "")
7158   (match_operand:SI 2 "const_int_operand" "")]
7159  "TARGET_SSE2"
7160{
7161  int mask = INTVAL (operands[2]);
7162  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7163				GEN_INT ((mask >> 0) & 3),
7164				GEN_INT ((mask >> 2) & 3),
7165				GEN_INT ((mask >> 4) & 3),
7166				GEN_INT ((mask >> 6) & 3)));
7167  DONE;
7168})
7169
7170(define_insn "sse2_pshufd_1"
7171  [(set (match_operand:V4SI 0 "register_operand" "=x")
7172	(vec_select:V4SI
7173	  (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7174	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7175		     (match_operand 3 "const_0_to_3_operand" "")
7176		     (match_operand 4 "const_0_to_3_operand" "")
7177		     (match_operand 5 "const_0_to_3_operand" "")])))]
7178  "TARGET_SSE2"
7179{
7180  int mask = 0;
7181  mask |= INTVAL (operands[2]) << 0;
7182  mask |= INTVAL (operands[3]) << 2;
7183  mask |= INTVAL (operands[4]) << 4;
7184  mask |= INTVAL (operands[5]) << 6;
7185  operands[2] = GEN_INT (mask);
7186
7187  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7188}
7189  [(set_attr "type" "sselog1")
7190   (set_attr "prefix_data16" "1")
7191   (set_attr "prefix" "maybe_vex")
7192   (set_attr "length_immediate" "1")
7193   (set_attr "mode" "TI")])
7194
7195(define_expand "avx2_pshuflwv3"
7196  [(match_operand:V16HI 0 "register_operand" "")
7197   (match_operand:V16HI 1 "nonimmediate_operand" "")
7198   (match_operand:SI 2 "const_0_to_255_operand" "")]
7199  "TARGET_AVX2"
7200{
7201  int mask = INTVAL (operands[2]);
7202  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7203				 GEN_INT ((mask >> 0) & 3),
7204				 GEN_INT ((mask >> 2) & 3),
7205				 GEN_INT ((mask >> 4) & 3),
7206				 GEN_INT ((mask >> 6) & 3),
7207				 GEN_INT (((mask >> 0) & 3) + 8),
7208				 GEN_INT (((mask >> 2) & 3) + 8),
7209				 GEN_INT (((mask >> 4) & 3) + 8),
7210				 GEN_INT (((mask >> 6) & 3) + 8)));
7211  DONE;
7212})
7213
7214(define_insn "avx2_pshuflw_1"
7215  [(set (match_operand:V16HI 0 "register_operand" "=x")
7216	(vec_select:V16HI
7217	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7218	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7219		     (match_operand 3 "const_0_to_3_operand" "")
7220		     (match_operand 4 "const_0_to_3_operand" "")
7221		     (match_operand 5 "const_0_to_3_operand" "")
7222		     (const_int 4)
7223		     (const_int 5)
7224		     (const_int 6)
7225		     (const_int 7)
7226		     (match_operand 6 "const_8_to_11_operand" "")
7227		     (match_operand 7 "const_8_to_11_operand" "")
7228		     (match_operand 8 "const_8_to_11_operand" "")
7229		     (match_operand 9 "const_8_to_11_operand" "")
7230		     (const_int 12)
7231		     (const_int 13)
7232		     (const_int 14)
7233		     (const_int 15)])))]
7234  "TARGET_AVX2
7235   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7236   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7237   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7238   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7239{
7240  int mask = 0;
7241  mask |= INTVAL (operands[2]) << 0;
7242  mask |= INTVAL (operands[3]) << 2;
7243  mask |= INTVAL (operands[4]) << 4;
7244  mask |= INTVAL (operands[5]) << 6;
7245  operands[2] = GEN_INT (mask);
7246
7247  return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7248}
7249  [(set_attr "type" "sselog")
7250   (set_attr "prefix" "vex")
7251   (set_attr "length_immediate" "1")
7252   (set_attr "mode" "OI")])
7253
7254(define_expand "sse2_pshuflw"
7255  [(match_operand:V8HI 0 "register_operand" "")
7256   (match_operand:V8HI 1 "nonimmediate_operand" "")
7257   (match_operand:SI 2 "const_int_operand" "")]
7258  "TARGET_SSE2"
7259{
7260  int mask = INTVAL (operands[2]);
7261  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7262				 GEN_INT ((mask >> 0) & 3),
7263				 GEN_INT ((mask >> 2) & 3),
7264				 GEN_INT ((mask >> 4) & 3),
7265				 GEN_INT ((mask >> 6) & 3)));
7266  DONE;
7267})
7268
7269(define_insn "sse2_pshuflw_1"
7270  [(set (match_operand:V8HI 0 "register_operand" "=x")
7271	(vec_select:V8HI
7272	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7273	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7274		     (match_operand 3 "const_0_to_3_operand" "")
7275		     (match_operand 4 "const_0_to_3_operand" "")
7276		     (match_operand 5 "const_0_to_3_operand" "")
7277		     (const_int 4)
7278		     (const_int 5)
7279		     (const_int 6)
7280		     (const_int 7)])))]
7281  "TARGET_SSE2"
7282{
7283  int mask = 0;
7284  mask |= INTVAL (operands[2]) << 0;
7285  mask |= INTVAL (operands[3]) << 2;
7286  mask |= INTVAL (operands[4]) << 4;
7287  mask |= INTVAL (operands[5]) << 6;
7288  operands[2] = GEN_INT (mask);
7289
7290  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7291}
7292  [(set_attr "type" "sselog")
7293   (set_attr "prefix_data16" "0")
7294   (set_attr "prefix_rep" "1")
7295   (set_attr "prefix" "maybe_vex")
7296   (set_attr "length_immediate" "1")
7297   (set_attr "mode" "TI")])
7298
7299(define_expand "avx2_pshufhwv3"
7300  [(match_operand:V16HI 0 "register_operand" "")
7301   (match_operand:V16HI 1 "nonimmediate_operand" "")
7302   (match_operand:SI 2 "const_0_to_255_operand" "")]
7303  "TARGET_AVX2"
7304{
7305  int mask = INTVAL (operands[2]);
7306  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7307				 GEN_INT (((mask >> 0) & 3) + 4),
7308				 GEN_INT (((mask >> 2) & 3) + 4),
7309				 GEN_INT (((mask >> 4) & 3) + 4),
7310				 GEN_INT (((mask >> 6) & 3) + 4),
7311				 GEN_INT (((mask >> 0) & 3) + 12),
7312				 GEN_INT (((mask >> 2) & 3) + 12),
7313				 GEN_INT (((mask >> 4) & 3) + 12),
7314				 GEN_INT (((mask >> 6) & 3) + 12)));
7315  DONE;
7316})
7317
7318(define_insn "avx2_pshufhw_1"
7319  [(set (match_operand:V16HI 0 "register_operand" "=x")
7320	(vec_select:V16HI
7321	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7322	  (parallel [(const_int 0)
7323		     (const_int 1)
7324		     (const_int 2)
7325		     (const_int 3)
7326		     (match_operand 2 "const_4_to_7_operand" "")
7327		     (match_operand 3 "const_4_to_7_operand" "")
7328		     (match_operand 4 "const_4_to_7_operand" "")
7329		     (match_operand 5 "const_4_to_7_operand" "")
7330		     (const_int 8)
7331		     (const_int 9)
7332		     (const_int 10)
7333		     (const_int 11)
7334		     (match_operand 6 "const_12_to_15_operand" "")
7335		     (match_operand 7 "const_12_to_15_operand" "")
7336		     (match_operand 8 "const_12_to_15_operand" "")
7337		     (match_operand 9 "const_12_to_15_operand" "")])))]
7338  "TARGET_AVX2
7339   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7340   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7341   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7342   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7343{
7344  int mask = 0;
7345  mask |= (INTVAL (operands[2]) - 4) << 0;
7346  mask |= (INTVAL (operands[3]) - 4) << 2;
7347  mask |= (INTVAL (operands[4]) - 4) << 4;
7348  mask |= (INTVAL (operands[5]) - 4) << 6;
7349  operands[2] = GEN_INT (mask);
7350
7351  return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7352}
7353  [(set_attr "type" "sselog")
7354   (set_attr "prefix" "vex")
7355   (set_attr "length_immediate" "1")
7356   (set_attr "mode" "OI")])
7357
7358(define_expand "sse2_pshufhw"
7359  [(match_operand:V8HI 0 "register_operand" "")
7360   (match_operand:V8HI 1 "nonimmediate_operand" "")
7361   (match_operand:SI 2 "const_int_operand" "")]
7362  "TARGET_SSE2"
7363{
7364  int mask = INTVAL (operands[2]);
7365  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7366				 GEN_INT (((mask >> 0) & 3) + 4),
7367				 GEN_INT (((mask >> 2) & 3) + 4),
7368				 GEN_INT (((mask >> 4) & 3) + 4),
7369				 GEN_INT (((mask >> 6) & 3) + 4)));
7370  DONE;
7371})
7372
7373(define_insn "sse2_pshufhw_1"
7374  [(set (match_operand:V8HI 0 "register_operand" "=x")
7375	(vec_select:V8HI
7376	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7377	  (parallel [(const_int 0)
7378		     (const_int 1)
7379		     (const_int 2)
7380		     (const_int 3)
7381		     (match_operand 2 "const_4_to_7_operand" "")
7382		     (match_operand 3 "const_4_to_7_operand" "")
7383		     (match_operand 4 "const_4_to_7_operand" "")
7384		     (match_operand 5 "const_4_to_7_operand" "")])))]
7385  "TARGET_SSE2"
7386{
7387  int mask = 0;
7388  mask |= (INTVAL (operands[2]) - 4) << 0;
7389  mask |= (INTVAL (operands[3]) - 4) << 2;
7390  mask |= (INTVAL (operands[4]) - 4) << 4;
7391  mask |= (INTVAL (operands[5]) - 4) << 6;
7392  operands[2] = GEN_INT (mask);
7393
7394  return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7395}
7396  [(set_attr "type" "sselog")
7397   (set_attr "prefix_rep" "1")
7398   (set_attr "prefix_data16" "0")
7399   (set_attr "prefix" "maybe_vex")
7400   (set_attr "length_immediate" "1")
7401   (set_attr "mode" "TI")])
7402
7403(define_expand "sse2_loadd"
7404  [(set (match_operand:V4SI 0 "register_operand" "")
7405	(vec_merge:V4SI
7406	  (vec_duplicate:V4SI
7407	    (match_operand:SI 1 "nonimmediate_operand" ""))
7408	  (match_dup 2)
7409	  (const_int 1)))]
7410  "TARGET_SSE"
7411  "operands[2] = CONST0_RTX (V4SImode);")
7412
7413(define_insn "sse2_loadld"
7414  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x,x,x")
7415	(vec_merge:V4SI
7416	  (vec_duplicate:V4SI
7417	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7418	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,x")
7419	  (const_int 1)))]
7420  "TARGET_SSE"
7421  "@
7422   %vmovd\t{%2, %0|%0, %2}
7423   %vmovd\t{%2, %0|%0, %2}
7424   movss\t{%2, %0|%0, %2}
7425   movss\t{%2, %0|%0, %2}
7426   vmovss\t{%2, %1, %0|%0, %1, %2}"
7427  [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7428   (set_attr "type" "ssemov")
7429   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7430   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7431
7432(define_insn_and_split "sse2_stored"
7433  [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7434	(vec_select:SI
7435	  (match_operand:V4SI 1 "register_operand" "x,Yi")
7436	  (parallel [(const_int 0)])))]
7437  "TARGET_SSE"
7438  "#"
7439  "&& reload_completed
7440   && (TARGET_INTER_UNIT_MOVES
7441       || MEM_P (operands [0])
7442       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7443  [(set (match_dup 0) (match_dup 1))]
7444  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7445
7446(define_insn_and_split "*vec_ext_v4si_mem"
7447  [(set (match_operand:SI 0 "register_operand" "=r")
7448	(vec_select:SI
7449	  (match_operand:V4SI 1 "memory_operand" "o")
7450	  (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7451  ""
7452  "#"
7453  "reload_completed"
7454  [(const_int 0)]
7455{
7456  int i = INTVAL (operands[2]);
7457
7458  emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7459  DONE;
7460})
7461
7462(define_expand "sse_storeq"
7463  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7464	(vec_select:DI
7465	  (match_operand:V2DI 1 "register_operand" "")
7466	  (parallel [(const_int 0)])))]
7467  "TARGET_SSE")
7468
7469(define_insn "*sse2_storeq_rex64"
7470  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7471	(vec_select:DI
7472	  (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7473	  (parallel [(const_int 0)])))]
7474  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7475  "@
7476   #
7477   #
7478   mov{q}\t{%1, %0|%0, %1}"
7479  [(set_attr "type" "*,*,imov")
7480   (set_attr "mode" "*,*,DI")])
7481
7482(define_insn "*sse2_storeq"
7483  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7484	(vec_select:DI
7485	  (match_operand:V2DI 1 "register_operand" "x")
7486	  (parallel [(const_int 0)])))]
7487  "TARGET_SSE"
7488  "#")
7489
7490(define_split
7491  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7492	(vec_select:DI
7493	  (match_operand:V2DI 1 "register_operand" "")
7494	  (parallel [(const_int 0)])))]
7495  "TARGET_SSE
7496   && reload_completed
7497   && (TARGET_INTER_UNIT_MOVES
7498       || MEM_P (operands [0])
7499       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7500  [(set (match_dup 0) (match_dup 1))]
7501  "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7502
7503(define_insn "*vec_extractv2di_1_rex64"
7504  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,r")
7505	(vec_select:DI
7506	  (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7507	  (parallel [(const_int 1)])))]
7508  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7509  "@
7510   %vmovhps\t{%1, %0|%0, %1}
7511   psrldq\t{$8, %0|%0, 8}
7512   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7513   %vmovq\t{%H1, %0|%0, %H1}
7514   mov{q}\t{%H1, %0|%0, %H1}"
7515  [(set_attr "isa" "*,noavx,avx,*,*")
7516   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7517   (set_attr "length_immediate" "*,1,1,*,*")
7518   (set_attr "memory" "*,none,none,*,*")
7519   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7520   (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7521
7522(define_insn "*vec_extractv2di_1"
7523  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,x,x")
7524	(vec_select:DI
7525	  (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7526	  (parallel [(const_int 1)])))]
7527  "!TARGET_64BIT && TARGET_SSE
7528   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7529  "@
7530   %vmovhps\t{%1, %0|%0, %1}
7531   psrldq\t{$8, %0|%0, 8}
7532   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7533   %vmovq\t{%H1, %0|%0, %H1}
7534   movhlps\t{%1, %0|%0, %1}
7535   movlps\t{%H1, %0|%0, %H1}"
7536  [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7537   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7538   (set_attr "length_immediate" "*,1,1,*,*,*")
7539   (set_attr "memory" "*,none,none,*,*,*")
7540   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7541   (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7542
7543(define_insn "*vec_dupv4si"
7544  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
7545	(vec_duplicate:V4SI
7546	  (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7547  "TARGET_SSE"
7548  "@
7549   %vpshufd\t{$0, %1, %0|%0, %1, 0}
7550   vbroadcastss\t{%1, %0|%0, %1}
7551   shufps\t{$0, %0, %0|%0, %0, 0}"
7552  [(set_attr "isa" "sse2,avx,noavx")
7553   (set_attr "type" "sselog1,ssemov,sselog1")
7554   (set_attr "length_immediate" "1,0,1")
7555   (set_attr "prefix_extra" "0,1,*")
7556   (set_attr "prefix" "maybe_vex,vex,orig")
7557   (set_attr "mode" "TI,V4SF,V4SF")])
7558
7559(define_insn "*vec_dupv2di"
7560  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
7561	(vec_duplicate:V2DI
7562	  (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7563  "TARGET_SSE"
7564  "@
7565   punpcklqdq\t%0, %0
7566   vpunpcklqdq\t{%d1, %0|%0, %d1}
7567   %vmovddup\t{%1, %0|%0, %1}
7568   movlhps\t%0, %0"
7569  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7570   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7571   (set_attr "prefix" "orig,vex,maybe_vex,orig")
7572   (set_attr "mode" "TI,TI,DF,V4SF")])
7573
7574(define_insn "*vec_concatv2si_sse4_1"
7575  [(set (match_operand:V2SI 0 "register_operand"     "=x, x,x,x, x, *y,*y")
7576	(vec_concat:V2SI
7577	  (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm,  0,rm")
7578	  (match_operand:SI 2 "vector_move_operand"  "rm,rm,x,x, C,*ym, C")))]
7579  "TARGET_SSE4_1"
7580  "@
7581   pinsrd\t{$1, %2, %0|%0, %2, 1}
7582   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7583   punpckldq\t{%2, %0|%0, %2}
7584   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7585   %vmovd\t{%1, %0|%0, %1}
7586   punpckldq\t{%2, %0|%0, %2}
7587   movd\t{%1, %0|%0, %1}"
7588  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7589   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7590   (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7591   (set_attr "length_immediate" "1,1,*,*,*,*,*")
7592   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7593   (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7594
7595;; ??? In theory we can match memory for the MMX alternative, but allowing
7596;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7597;; alternatives pretty much forces the MMX alternative to be chosen.
7598(define_insn "*vec_concatv2si_sse2"
7599  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,*y")
7600	(vec_concat:V2SI
7601	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7602	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,*y, C")))]
7603  "TARGET_SSE2"
7604  "@
7605   punpckldq\t{%2, %0|%0, %2}
7606   movd\t{%1, %0|%0, %1}
7607   punpckldq\t{%2, %0|%0, %2}
7608   movd\t{%1, %0|%0, %1}"
7609  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7610   (set_attr "mode" "TI,TI,DI,DI")])
7611
7612(define_insn "*vec_concatv2si_sse"
7613  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
7614	(vec_concat:V2SI
7615	  (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7616	  (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
7617  "TARGET_SSE"
7618  "@
7619   unpcklps\t{%2, %0|%0, %2}
7620   movss\t{%1, %0|%0, %1}
7621   punpckldq\t{%2, %0|%0, %2}
7622   movd\t{%1, %0|%0, %1}"
7623  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7624   (set_attr "mode" "V4SF,V4SF,DI,DI")])
7625
7626(define_insn "*vec_concatv4si"
7627  [(set (match_operand:V4SI 0 "register_operand"       "=x,x,x,x,x")
7628	(vec_concat:V4SI
7629	  (match_operand:V2SI 1 "register_operand"     " 0,x,0,0,x")
7630	  (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7631  "TARGET_SSE"
7632  "@
7633   punpcklqdq\t{%2, %0|%0, %2}
7634   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7635   movlhps\t{%2, %0|%0, %2}
7636   movhps\t{%2, %0|%0, %2}
7637   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7638  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7639   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7640   (set_attr "prefix" "orig,vex,orig,orig,vex")
7641   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7642
7643;; movd instead of movq is required to handle broken assemblers.
7644(define_insn "*vec_concatv2di_rex64"
7645  [(set (match_operand:V2DI 0 "register_operand"
7646	  "=x,x ,x ,Yi,!x,x,x,x,x")
7647	(vec_concat:V2DI
7648	  (match_operand:DI 1 "nonimmediate_operand"
7649	  " 0,x ,xm,r ,*y,0,x,0,x")
7650	  (match_operand:DI 2 "vector_move_operand"
7651	  "rm,rm,C ,C ,C ,x,x,m,m")))]
7652  "TARGET_64BIT"
7653  "@
7654   pinsrq\t{$1, %2, %0|%0, %2, 1}
7655   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7656   %vmovq\t{%1, %0|%0, %1}
7657   %vmovd\t{%1, %0|%0, %1}
7658   movq2dq\t{%1, %0|%0, %1}
7659   punpcklqdq\t{%2, %0|%0, %2}
7660   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7661   movhps\t{%2, %0|%0, %2}
7662   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7663  [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7664   (set (attr "type")
7665     (if_then_else
7666       (eq_attr "alternative" "0,1,5,6")
7667       (const_string "sselog")
7668       (const_string "ssemov")))
7669   (set (attr "prefix_rex")
7670     (if_then_else
7671       (and (eq_attr "alternative" "0,3")
7672	    (not (match_test "TARGET_AVX")))
7673       (const_string "1")
7674       (const_string "*")))
7675   (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7676   (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7677   (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7678   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7679
7680(define_insn "vec_concatv2di"
7681  [(set (match_operand:V2DI 0 "register_operand"     "=x,?x,x,x,x,x,x")
7682	(vec_concat:V2DI
7683	  (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7684	  (match_operand:DI 2 "vector_move_operand"  " C, C,x,x,x,m,m")))]
7685  "!TARGET_64BIT && TARGET_SSE"
7686  "@
7687   %vmovq\t{%1, %0|%0, %1}
7688   movq2dq\t{%1, %0|%0, %1}
7689   punpcklqdq\t{%2, %0|%0, %2}
7690   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7691   movlhps\t{%2, %0|%0, %2}
7692   movhps\t{%2, %0|%0, %2}
7693   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7694  [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7695   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7696   (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7697   (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7698
7699(define_expand "vec_unpacks_lo_<mode>"
7700  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7701   (match_operand:VI124_AVX2 1 "register_operand" "")]
7702  "TARGET_SSE2"
7703  "ix86_expand_sse_unpack (operands, false, false); DONE;")
7704
7705(define_expand "vec_unpacks_hi_<mode>"
7706  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7707   (match_operand:VI124_AVX2 1 "register_operand" "")]
7708  "TARGET_SSE2"
7709  "ix86_expand_sse_unpack (operands, false, true); DONE;")
7710
7711(define_expand "vec_unpacku_lo_<mode>"
7712  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7713   (match_operand:VI124_AVX2 1 "register_operand" "")]
7714  "TARGET_SSE2"
7715  "ix86_expand_sse_unpack (operands, true, false); DONE;")
7716
7717(define_expand "vec_unpacku_hi_<mode>"
7718  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7719   (match_operand:VI124_AVX2 1 "register_operand" "")]
7720  "TARGET_SSE2"
7721  "ix86_expand_sse_unpack (operands, true, true); DONE;")
7722
7723;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7724;;
7725;; Miscellaneous
7726;;
7727;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7728
7729(define_expand "avx2_uavgv32qi3"
7730  [(set (match_operand:V32QI 0 "register_operand" "")
7731	(truncate:V32QI
7732	  (lshiftrt:V32HI
7733	    (plus:V32HI
7734	      (plus:V32HI
7735		(zero_extend:V32HI
7736		  (match_operand:V32QI 1 "nonimmediate_operand" ""))
7737		(zero_extend:V32HI
7738		  (match_operand:V32QI 2 "nonimmediate_operand" "")))
7739	      (const_vector:V32QI [(const_int 1) (const_int 1)
7740				   (const_int 1) (const_int 1)
7741				   (const_int 1) (const_int 1)
7742				   (const_int 1) (const_int 1)
7743				   (const_int 1) (const_int 1)
7744				   (const_int 1) (const_int 1)
7745				   (const_int 1) (const_int 1)
7746				   (const_int 1) (const_int 1)
7747				   (const_int 1) (const_int 1)
7748				   (const_int 1) (const_int 1)
7749				   (const_int 1) (const_int 1)
7750				   (const_int 1) (const_int 1)
7751				   (const_int 1) (const_int 1)
7752				   (const_int 1) (const_int 1)
7753				   (const_int 1) (const_int 1)
7754				   (const_int 1) (const_int 1)]))
7755	    (const_int 1))))]
7756  "TARGET_AVX2"
7757  "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7758
7759(define_expand "sse2_uavgv16qi3"
7760  [(set (match_operand:V16QI 0 "register_operand" "")
7761	(truncate:V16QI
7762	  (lshiftrt:V16HI
7763	    (plus:V16HI
7764	      (plus:V16HI
7765		(zero_extend:V16HI
7766		  (match_operand:V16QI 1 "nonimmediate_operand" ""))
7767		(zero_extend:V16HI
7768		  (match_operand:V16QI 2 "nonimmediate_operand" "")))
7769	      (const_vector:V16QI [(const_int 1) (const_int 1)
7770				   (const_int 1) (const_int 1)
7771				   (const_int 1) (const_int 1)
7772				   (const_int 1) (const_int 1)
7773				   (const_int 1) (const_int 1)
7774				   (const_int 1) (const_int 1)
7775				   (const_int 1) (const_int 1)
7776				   (const_int 1) (const_int 1)]))
7777	    (const_int 1))))]
7778  "TARGET_SSE2"
7779  "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7780
7781(define_insn "*avx2_uavgv32qi3"
7782  [(set (match_operand:V32QI 0 "register_operand" "=x")
7783	(truncate:V32QI
7784	  (lshiftrt:V32HI
7785	    (plus:V32HI
7786	      (plus:V32HI
7787		(zero_extend:V32HI
7788		  (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7789		(zero_extend:V32HI
7790		  (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7791	      (const_vector:V32QI [(const_int 1) (const_int 1)
7792				   (const_int 1) (const_int 1)
7793				   (const_int 1) (const_int 1)
7794				   (const_int 1) (const_int 1)
7795				   (const_int 1) (const_int 1)
7796				   (const_int 1) (const_int 1)
7797				   (const_int 1) (const_int 1)
7798				   (const_int 1) (const_int 1)
7799				   (const_int 1) (const_int 1)
7800				   (const_int 1) (const_int 1)
7801				   (const_int 1) (const_int 1)
7802				   (const_int 1) (const_int 1)
7803				   (const_int 1) (const_int 1)
7804				   (const_int 1) (const_int 1)
7805				   (const_int 1) (const_int 1)
7806				   (const_int 1) (const_int 1)]))
7807	    (const_int 1))))]
7808  "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7809  "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7810  [(set_attr "type" "sseiadd")
7811   (set_attr "prefix" "vex")
7812   (set_attr "mode" "OI")])
7813
7814(define_insn "*sse2_uavgv16qi3"
7815  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7816	(truncate:V16QI
7817	  (lshiftrt:V16HI
7818	    (plus:V16HI
7819	      (plus:V16HI
7820		(zero_extend:V16HI
7821		  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7822		(zero_extend:V16HI
7823		  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7824	      (const_vector:V16QI [(const_int 1) (const_int 1)
7825				   (const_int 1) (const_int 1)
7826				   (const_int 1) (const_int 1)
7827				   (const_int 1) (const_int 1)
7828				   (const_int 1) (const_int 1)
7829				   (const_int 1) (const_int 1)
7830				   (const_int 1) (const_int 1)
7831				   (const_int 1) (const_int 1)]))
7832	    (const_int 1))))]
7833  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7834  "@
7835   pavgb\t{%2, %0|%0, %2}
7836   vpavgb\t{%2, %1, %0|%0, %1, %2}"
7837  [(set_attr "isa" "noavx,avx")
7838   (set_attr "type" "sseiadd")
7839   (set_attr "prefix_data16" "1,*")
7840   (set_attr "prefix" "orig,vex")
7841   (set_attr "mode" "TI")])
7842
7843(define_expand "avx2_uavgv16hi3"
7844  [(set (match_operand:V16HI 0 "register_operand" "")
7845	(truncate:V16HI
7846	  (lshiftrt:V16SI
7847	    (plus:V16SI
7848	      (plus:V16SI
7849		(zero_extend:V16SI
7850		  (match_operand:V16HI 1 "nonimmediate_operand" ""))
7851		(zero_extend:V16SI
7852		  (match_operand:V16HI 2 "nonimmediate_operand" "")))
7853	      (const_vector:V16HI [(const_int 1) (const_int 1)
7854				   (const_int 1) (const_int 1)
7855				   (const_int 1) (const_int 1)
7856				   (const_int 1) (const_int 1)
7857				   (const_int 1) (const_int 1)
7858				   (const_int 1) (const_int 1)
7859				   (const_int 1) (const_int 1)
7860				   (const_int 1) (const_int 1)]))
7861	    (const_int 1))))]
7862  "TARGET_AVX2"
7863  "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7864
7865(define_expand "sse2_uavgv8hi3"
7866  [(set (match_operand:V8HI 0 "register_operand" "")
7867	(truncate:V8HI
7868	  (lshiftrt:V8SI
7869	    (plus:V8SI
7870	      (plus:V8SI
7871		(zero_extend:V8SI
7872		  (match_operand:V8HI 1 "nonimmediate_operand" ""))
7873		(zero_extend:V8SI
7874		  (match_operand:V8HI 2 "nonimmediate_operand" "")))
7875	      (const_vector:V8HI [(const_int 1) (const_int 1)
7876				  (const_int 1) (const_int 1)
7877				  (const_int 1) (const_int 1)
7878				  (const_int 1) (const_int 1)]))
7879	    (const_int 1))))]
7880  "TARGET_SSE2"
7881  "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7882
7883(define_insn "*avx2_uavgv16hi3"
7884  [(set (match_operand:V16HI 0 "register_operand" "=x")
7885	(truncate:V16HI
7886	  (lshiftrt:V16SI
7887	    (plus:V16SI
7888	      (plus:V16SI
7889		(zero_extend:V16SI
7890		  (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7891		(zero_extend:V16SI
7892		  (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7893	      (const_vector:V16HI [(const_int 1) (const_int 1)
7894				   (const_int 1) (const_int 1)
7895				   (const_int 1) (const_int 1)
7896				   (const_int 1) (const_int 1)
7897				   (const_int 1) (const_int 1)
7898				   (const_int 1) (const_int 1)
7899				   (const_int 1) (const_int 1)
7900				   (const_int 1) (const_int 1)]))
7901	    (const_int 1))))]
7902  "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7903  "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7904  [(set_attr "type" "sseiadd")
7905   (set_attr "prefix" "vex")
7906   (set_attr "mode" "OI")])
7907
7908(define_insn "*sse2_uavgv8hi3"
7909  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7910	(truncate:V8HI
7911	  (lshiftrt:V8SI
7912	    (plus:V8SI
7913	      (plus:V8SI
7914		(zero_extend:V8SI
7915		  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7916		(zero_extend:V8SI
7917		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7918	      (const_vector:V8HI [(const_int 1) (const_int 1)
7919				  (const_int 1) (const_int 1)
7920				  (const_int 1) (const_int 1)
7921				  (const_int 1) (const_int 1)]))
7922	    (const_int 1))))]
7923  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7924  "@
7925   pavgw\t{%2, %0|%0, %2}
7926   vpavgw\t{%2, %1, %0|%0, %1, %2}"
7927  [(set_attr "isa" "noavx,avx")
7928   (set_attr "type" "sseiadd")
7929   (set_attr "prefix_data16" "1,*")
7930   (set_attr "prefix" "orig,vex")
7931   (set_attr "mode" "TI")])
7932
7933;; The correct representation for this is absolutely enormous, and
7934;; surely not generally useful.
7935(define_insn "<sse2_avx2>_psadbw"
7936  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7937	(unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7938			  (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7939			  UNSPEC_PSADBW))]
7940  "TARGET_SSE2"
7941  "@
7942   psadbw\t{%2, %0|%0, %2}
7943   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7944  [(set_attr "isa" "noavx,avx")
7945   (set_attr "type" "sseiadd")
7946   (set_attr "atom_unit" "simul")
7947   (set_attr "prefix_data16" "1,*")
7948   (set_attr "prefix" "orig,vex")
7949   (set_attr "mode" "<sseinsnmode>")])
7950
7951(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7952  [(set (match_operand:SI 0 "register_operand" "=r")
7953	(unspec:SI
7954	  [(match_operand:VF 1 "register_operand" "x")]
7955	  UNSPEC_MOVMSK))]
7956  "TARGET_SSE"
7957  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7958  [(set_attr "type" "ssemov")
7959   (set_attr "prefix" "maybe_vex")
7960   (set_attr "mode" "<MODE>")])
7961
7962(define_insn "avx2_pmovmskb"
7963  [(set (match_operand:SI 0 "register_operand" "=r")
7964	(unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7965		   UNSPEC_MOVMSK))]
7966  "TARGET_AVX2"
7967  "vpmovmskb\t{%1, %0|%0, %1}"
7968  [(set_attr "type" "ssemov")
7969   (set_attr "prefix" "vex")
7970   (set_attr "mode" "DI")])
7971
7972(define_insn "sse2_pmovmskb"
7973  [(set (match_operand:SI 0 "register_operand" "=r")
7974	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7975		   UNSPEC_MOVMSK))]
7976  "TARGET_SSE2"
7977  "%vpmovmskb\t{%1, %0|%0, %1}"
7978  [(set_attr "type" "ssemov")
7979   (set_attr "prefix_data16" "1")
7980   (set_attr "prefix" "maybe_vex")
7981   (set_attr "mode" "SI")])
7982
7983(define_expand "sse2_maskmovdqu"
7984  [(set (match_operand:V16QI 0 "memory_operand" "")
7985	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7986		       (match_operand:V16QI 2 "register_operand" "")
7987		       (match_dup 0)]
7988		      UNSPEC_MASKMOV))]
7989  "TARGET_SSE2")
7990
7991(define_insn "*sse2_maskmovdqu"
7992  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7993	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7994		       (match_operand:V16QI 2 "register_operand" "x")
7995		       (mem:V16QI (match_dup 0))]
7996		      UNSPEC_MASKMOV))]
7997  "TARGET_SSE2"
7998  "%vmaskmovdqu\t{%2, %1|%1, %2}"
7999  [(set_attr "type" "ssemov")
8000   (set_attr "prefix_data16" "1")
8001   ;; The implicit %rdi operand confuses default length_vex computation.
8002   (set (attr "length_vex")
8003     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8004   (set_attr "prefix" "maybe_vex")
8005   (set_attr "mode" "TI")])
8006
8007(define_insn "sse_ldmxcsr"
8008  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8009		    UNSPECV_LDMXCSR)]
8010  "TARGET_SSE"
8011  "%vldmxcsr\t%0"
8012  [(set_attr "type" "sse")
8013   (set_attr "atom_sse_attr" "mxcsr")
8014   (set_attr "prefix" "maybe_vex")
8015   (set_attr "memory" "load")])
8016
8017(define_insn "sse_stmxcsr"
8018  [(set (match_operand:SI 0 "memory_operand" "=m")
8019	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8020  "TARGET_SSE"
8021  "%vstmxcsr\t%0"
8022  [(set_attr "type" "sse")
8023   (set_attr "atom_sse_attr" "mxcsr")
8024   (set_attr "prefix" "maybe_vex")
8025   (set_attr "memory" "store")])
8026
8027(define_insn "sse2_clflush"
8028  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8029		    UNSPECV_CLFLUSH)]
8030  "TARGET_SSE2"
8031  "clflush\t%a0"
8032  [(set_attr "type" "sse")
8033   (set_attr "atom_sse_attr" "fence")
8034   (set_attr "memory" "unknown")])
8035
8036
8037(define_insn "sse3_mwait"
8038  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8039		     (match_operand:SI 1 "register_operand" "c")]
8040		    UNSPECV_MWAIT)]
8041  "TARGET_SSE3"
8042;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8043;; Since 32bit register operands are implicitly zero extended to 64bit,
8044;; we only need to set up 32bit registers.
8045  "mwait"
8046  [(set_attr "length" "3")])
8047
8048(define_insn "sse3_monitor"
8049  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8050		     (match_operand:SI 1 "register_operand" "c")
8051		     (match_operand:SI 2 "register_operand" "d")]
8052		    UNSPECV_MONITOR)]
8053  "TARGET_SSE3 && !TARGET_64BIT"
8054  "monitor\t%0, %1, %2"
8055  [(set_attr "length" "3")])
8056
8057(define_insn "sse3_monitor64"
8058  [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8059		     (match_operand:SI 1 "register_operand" "c")
8060		     (match_operand:SI 2 "register_operand" "d")]
8061		    UNSPECV_MONITOR)]
8062  "TARGET_SSE3 && TARGET_64BIT"
8063;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8064;; RCX and RDX are used.  Since 32bit register operands are implicitly
8065;; zero extended to 64bit, we only need to set up 32bit registers.
8066  "monitor"
8067  [(set_attr "length" "3")])
8068
8069;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8070;;
8071;; SSSE3 instructions
8072;;
8073;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8074
8075(define_insn "avx2_phaddwv16hi3"
8076  [(set (match_operand:V16HI 0 "register_operand" "=x")
8077	(vec_concat:V16HI
8078	  (vec_concat:V8HI
8079	    (vec_concat:V4HI
8080	      (vec_concat:V2HI
8081		(plus:HI
8082		  (vec_select:HI
8083		    (match_operand:V16HI 1 "register_operand" "x")
8084		    (parallel [(const_int 0)]))
8085		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8086		(plus:HI
8087		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8088		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8089	      (vec_concat:V2HI
8090		(plus:HI
8091		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8092		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8093		(plus:HI
8094		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8095		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8096	    (vec_concat:V4HI
8097	      (vec_concat:V2HI
8098		(plus:HI
8099		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8100		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8101		(plus:HI
8102		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8103		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8104	      (vec_concat:V2HI
8105		(plus:HI
8106		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8107		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8108		(plus:HI
8109		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8110		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8111	  (vec_concat:V8HI
8112	    (vec_concat:V4HI
8113	      (vec_concat:V2HI
8114		(plus:HI
8115		  (vec_select:HI
8116		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8117		    (parallel [(const_int 0)]))
8118		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8119		(plus:HI
8120		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8121		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8122	      (vec_concat:V2HI
8123		(plus:HI
8124		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8125		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8126		(plus:HI
8127		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8128		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8129	    (vec_concat:V4HI
8130	      (vec_concat:V2HI
8131		(plus:HI
8132		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8133		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8134		(plus:HI
8135		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8136		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8137	      (vec_concat:V2HI
8138		(plus:HI
8139		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8140		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8141		(plus:HI
8142		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8143		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8144  "TARGET_AVX2"
8145  "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8146  [(set_attr "type" "sseiadd")
8147   (set_attr "prefix_extra" "1")
8148   (set_attr "prefix" "vex")
8149   (set_attr "mode" "OI")])
8150
8151(define_insn "ssse3_phaddwv8hi3"
8152  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8153	(vec_concat:V8HI
8154	  (vec_concat:V4HI
8155	    (vec_concat:V2HI
8156	      (plus:HI
8157		(vec_select:HI
8158		  (match_operand:V8HI 1 "register_operand" "0,x")
8159		  (parallel [(const_int 0)]))
8160		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8161	      (plus:HI
8162		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8163		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8164	    (vec_concat:V2HI
8165	      (plus:HI
8166		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8167		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8168	      (plus:HI
8169		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8170		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8171	  (vec_concat:V4HI
8172	    (vec_concat:V2HI
8173	      (plus:HI
8174		(vec_select:HI
8175		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8176		  (parallel [(const_int 0)]))
8177		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8178	      (plus:HI
8179		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8180		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8181	    (vec_concat:V2HI
8182	      (plus:HI
8183		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8184		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8185	      (plus:HI
8186		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8187		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8188  "TARGET_SSSE3"
8189  "@
8190   phaddw\t{%2, %0|%0, %2}
8191   vphaddw\t{%2, %1, %0|%0, %1, %2}"
8192  [(set_attr "isa" "noavx,avx")
8193   (set_attr "type" "sseiadd")
8194   (set_attr "atom_unit" "complex")
8195   (set_attr "prefix_data16" "1,*")
8196   (set_attr "prefix_extra" "1")
8197   (set_attr "prefix" "orig,vex")
8198   (set_attr "mode" "TI")])
8199
8200(define_insn "ssse3_phaddwv4hi3"
8201  [(set (match_operand:V4HI 0 "register_operand" "=y")
8202	(vec_concat:V4HI
8203	  (vec_concat:V2HI
8204	    (plus:HI
8205	      (vec_select:HI
8206		(match_operand:V4HI 1 "register_operand" "0")
8207		(parallel [(const_int 0)]))
8208	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8209	    (plus:HI
8210	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8211	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8212	  (vec_concat:V2HI
8213	    (plus:HI
8214	      (vec_select:HI
8215		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8216		(parallel [(const_int 0)]))
8217	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8218	    (plus:HI
8219	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8220	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8221  "TARGET_SSSE3"
8222  "phaddw\t{%2, %0|%0, %2}"
8223  [(set_attr "type" "sseiadd")
8224   (set_attr "atom_unit" "complex")
8225   (set_attr "prefix_extra" "1")
8226   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8227   (set_attr "mode" "DI")])
8228
8229(define_insn "avx2_phadddv8si3"
8230  [(set (match_operand:V8SI 0 "register_operand" "=x")
8231	(vec_concat:V8SI
8232	  (vec_concat:V4SI
8233	    (vec_concat:V2SI
8234	      (plus:SI
8235		(vec_select:SI
8236		  (match_operand:V8SI 1 "register_operand" "x")
8237		  (parallel [(const_int 0)]))
8238		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8239	      (plus:SI
8240		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8241		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8242	    (vec_concat:V2SI
8243	      (plus:SI
8244		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8245		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8246	      (plus:SI
8247		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8248		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8249	  (vec_concat:V4SI
8250	    (vec_concat:V2SI
8251	      (plus:SI
8252		(vec_select:SI
8253		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8254		  (parallel [(const_int 0)]))
8255		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8256	      (plus:SI
8257		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8258		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8259	    (vec_concat:V2SI
8260	      (plus:SI
8261		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8262		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8263	      (plus:SI
8264		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8265		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8266  "TARGET_AVX2"
8267  "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8268  [(set_attr "type" "sseiadd")
8269   (set_attr "prefix_extra" "1")
8270   (set_attr "prefix" "vex")
8271   (set_attr "mode" "OI")])
8272
8273(define_insn "ssse3_phadddv4si3"
8274  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8275	(vec_concat:V4SI
8276	  (vec_concat:V2SI
8277	    (plus:SI
8278	      (vec_select:SI
8279		(match_operand:V4SI 1 "register_operand" "0,x")
8280		(parallel [(const_int 0)]))
8281	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8282	    (plus:SI
8283	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8284	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8285	  (vec_concat:V2SI
8286	    (plus:SI
8287	      (vec_select:SI
8288		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8289		(parallel [(const_int 0)]))
8290	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8291	    (plus:SI
8292	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8293	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8294  "TARGET_SSSE3"
8295  "@
8296   phaddd\t{%2, %0|%0, %2}
8297   vphaddd\t{%2, %1, %0|%0, %1, %2}"
8298  [(set_attr "isa" "noavx,avx")
8299   (set_attr "type" "sseiadd")
8300   (set_attr "atom_unit" "complex")
8301   (set_attr "prefix_data16" "1,*")
8302   (set_attr "prefix_extra" "1")
8303   (set_attr "prefix" "orig,vex")
8304   (set_attr "mode" "TI")])
8305
8306(define_insn "ssse3_phadddv2si3"
8307  [(set (match_operand:V2SI 0 "register_operand" "=y")
8308	(vec_concat:V2SI
8309	  (plus:SI
8310	    (vec_select:SI
8311	      (match_operand:V2SI 1 "register_operand" "0")
8312	      (parallel [(const_int 0)]))
8313	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8314	  (plus:SI
8315	    (vec_select:SI
8316	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8317	      (parallel [(const_int 0)]))
8318	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8319  "TARGET_SSSE3"
8320  "phaddd\t{%2, %0|%0, %2}"
8321  [(set_attr "type" "sseiadd")
8322   (set_attr "atom_unit" "complex")
8323   (set_attr "prefix_extra" "1")
8324   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8325   (set_attr "mode" "DI")])
8326
8327(define_insn "avx2_phaddswv16hi3"
8328  [(set (match_operand:V16HI 0 "register_operand" "=x")
8329	(vec_concat:V16HI
8330	  (vec_concat:V8HI
8331	    (vec_concat:V4HI
8332	      (vec_concat:V2HI
8333		(ss_plus:HI
8334		  (vec_select:HI
8335		    (match_operand:V16HI 1 "register_operand" "x")
8336		    (parallel [(const_int 0)]))
8337		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8338		(ss_plus:HI
8339		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8340		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8341	      (vec_concat:V2HI
8342		(ss_plus:HI
8343		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8344		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8345		(ss_plus:HI
8346		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8347		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8348	    (vec_concat:V4HI
8349	      (vec_concat:V2HI
8350		(ss_plus:HI
8351		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8352		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8353		(ss_plus:HI
8354		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8355		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8356	      (vec_concat:V2HI
8357		(ss_plus:HI
8358		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8359		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8360		(ss_plus:HI
8361		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8362		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8363	  (vec_concat:V8HI
8364	    (vec_concat:V4HI
8365	      (vec_concat:V2HI
8366		(ss_plus:HI
8367		  (vec_select:HI
8368		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8369		    (parallel [(const_int 0)]))
8370		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8371		(ss_plus:HI
8372		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8373		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8374	      (vec_concat:V2HI
8375		(ss_plus:HI
8376		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8377		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8378		(ss_plus:HI
8379		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8380		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8381	    (vec_concat:V4HI
8382	      (vec_concat:V2HI
8383		(ss_plus:HI
8384		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8385		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8386		(ss_plus:HI
8387		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8388		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8389	      (vec_concat:V2HI
8390		(ss_plus:HI
8391		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8392		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8393		(ss_plus:HI
8394		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8395		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8396  "TARGET_AVX2"
8397  "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8398  [(set_attr "type" "sseiadd")
8399   (set_attr "prefix_extra" "1")
8400   (set_attr "prefix" "vex")
8401   (set_attr "mode" "OI")])
8402
8403(define_insn "ssse3_phaddswv8hi3"
8404  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8405	(vec_concat:V8HI
8406	  (vec_concat:V4HI
8407	    (vec_concat:V2HI
8408	      (ss_plus:HI
8409		(vec_select:HI
8410		  (match_operand:V8HI 1 "register_operand" "0,x")
8411		  (parallel [(const_int 0)]))
8412		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8413	      (ss_plus:HI
8414		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8415		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8416	    (vec_concat:V2HI
8417	      (ss_plus:HI
8418		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8419		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8420	      (ss_plus:HI
8421		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8422		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8423	  (vec_concat:V4HI
8424	    (vec_concat:V2HI
8425	      (ss_plus:HI
8426		(vec_select:HI
8427		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8428		  (parallel [(const_int 0)]))
8429		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8430	      (ss_plus:HI
8431		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8432		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8433	    (vec_concat:V2HI
8434	      (ss_plus:HI
8435		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8436		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8437	      (ss_plus:HI
8438		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8439		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8440  "TARGET_SSSE3"
8441  "@
8442   phaddsw\t{%2, %0|%0, %2}
8443   vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8444  [(set_attr "isa" "noavx,avx")
8445   (set_attr "type" "sseiadd")
8446   (set_attr "atom_unit" "complex")
8447   (set_attr "prefix_data16" "1,*")
8448   (set_attr "prefix_extra" "1")
8449   (set_attr "prefix" "orig,vex")
8450   (set_attr "mode" "TI")])
8451
8452(define_insn "ssse3_phaddswv4hi3"
8453  [(set (match_operand:V4HI 0 "register_operand" "=y")
8454	(vec_concat:V4HI
8455	  (vec_concat:V2HI
8456	    (ss_plus:HI
8457	      (vec_select:HI
8458		(match_operand:V4HI 1 "register_operand" "0")
8459		(parallel [(const_int 0)]))
8460	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8461	    (ss_plus:HI
8462	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8463	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8464	  (vec_concat:V2HI
8465	    (ss_plus:HI
8466	      (vec_select:HI
8467		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8468		(parallel [(const_int 0)]))
8469	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8470	    (ss_plus:HI
8471	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8472	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8473  "TARGET_SSSE3"
8474  "phaddsw\t{%2, %0|%0, %2}"
8475  [(set_attr "type" "sseiadd")
8476   (set_attr "atom_unit" "complex")
8477   (set_attr "prefix_extra" "1")
8478   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8479   (set_attr "mode" "DI")])
8480
8481(define_insn "avx2_phsubwv16hi3"
8482  [(set (match_operand:V16HI 0 "register_operand" "=x")
8483	(vec_concat:V16HI
8484	  (vec_concat:V8HI
8485	    (vec_concat:V4HI
8486	      (vec_concat:V2HI
8487		(minus:HI
8488		  (vec_select:HI
8489		    (match_operand:V16HI 1 "register_operand" "x")
8490		    (parallel [(const_int 0)]))
8491		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8492		(minus:HI
8493		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8494		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8495	      (vec_concat:V2HI
8496		(minus:HI
8497		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8498		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8499		(minus:HI
8500		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8501		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8502	    (vec_concat:V4HI
8503	      (vec_concat:V2HI
8504		(minus:HI
8505		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8506		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8507		(minus:HI
8508		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8509		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8510	      (vec_concat:V2HI
8511		(minus:HI
8512		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8513		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8514		(minus:HI
8515		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8516		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8517	  (vec_concat:V8HI
8518	    (vec_concat:V4HI
8519	      (vec_concat:V2HI
8520		(minus:HI
8521		  (vec_select:HI
8522		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8523		    (parallel [(const_int 0)]))
8524		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8525		(minus:HI
8526		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8527		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8528	      (vec_concat:V2HI
8529		(minus:HI
8530		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8531		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8532		(minus:HI
8533		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8534		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8535	    (vec_concat:V4HI
8536	      (vec_concat:V2HI
8537		(minus:HI
8538		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8539		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8540		(minus:HI
8541		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8542		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8543	      (vec_concat:V2HI
8544		(minus:HI
8545		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8546		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8547		(minus:HI
8548		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8549		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8550  "TARGET_AVX2"
8551  "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8552  [(set_attr "type" "sseiadd")
8553   (set_attr "prefix_extra" "1")
8554   (set_attr "prefix" "vex")
8555   (set_attr "mode" "OI")])
8556
8557(define_insn "ssse3_phsubwv8hi3"
8558  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8559	(vec_concat:V8HI
8560	  (vec_concat:V4HI
8561	    (vec_concat:V2HI
8562	      (minus:HI
8563		(vec_select:HI
8564		  (match_operand:V8HI 1 "register_operand" "0,x")
8565		  (parallel [(const_int 0)]))
8566		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8567	      (minus:HI
8568		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8569		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8570	    (vec_concat:V2HI
8571	      (minus:HI
8572		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8573		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8574	      (minus:HI
8575		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8576		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8577	  (vec_concat:V4HI
8578	    (vec_concat:V2HI
8579	      (minus:HI
8580		(vec_select:HI
8581		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8582		  (parallel [(const_int 0)]))
8583		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8584	      (minus:HI
8585		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8586		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8587	    (vec_concat:V2HI
8588	      (minus:HI
8589		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8590		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8591	      (minus:HI
8592		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8593		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8594  "TARGET_SSSE3"
8595  "@
8596   phsubw\t{%2, %0|%0, %2}
8597   vphsubw\t{%2, %1, %0|%0, %1, %2}"
8598  [(set_attr "isa" "noavx,avx")
8599   (set_attr "type" "sseiadd")
8600   (set_attr "atom_unit" "complex")
8601   (set_attr "prefix_data16" "1,*")
8602   (set_attr "prefix_extra" "1")
8603   (set_attr "prefix" "orig,vex")
8604   (set_attr "mode" "TI")])
8605
8606(define_insn "ssse3_phsubwv4hi3"
8607  [(set (match_operand:V4HI 0 "register_operand" "=y")
8608	(vec_concat:V4HI
8609	  (vec_concat:V2HI
8610	    (minus:HI
8611	      (vec_select:HI
8612		(match_operand:V4HI 1 "register_operand" "0")
8613		(parallel [(const_int 0)]))
8614	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8615	    (minus:HI
8616	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8617	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8618	  (vec_concat:V2HI
8619	    (minus:HI
8620	      (vec_select:HI
8621		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8622		(parallel [(const_int 0)]))
8623	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8624	    (minus:HI
8625	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8626	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8627  "TARGET_SSSE3"
8628  "phsubw\t{%2, %0|%0, %2}"
8629  [(set_attr "type" "sseiadd")
8630   (set_attr "atom_unit" "complex")
8631   (set_attr "prefix_extra" "1")
8632   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8633   (set_attr "mode" "DI")])
8634
8635(define_insn "avx2_phsubdv8si3"
8636  [(set (match_operand:V8SI 0 "register_operand" "=x")
8637	(vec_concat:V8SI
8638	  (vec_concat:V4SI
8639	    (vec_concat:V2SI
8640	      (minus:SI
8641		(vec_select:SI
8642		  (match_operand:V8SI 1 "register_operand" "x")
8643		  (parallel [(const_int 0)]))
8644		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8645	      (minus:SI
8646		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8647		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8648	    (vec_concat:V2SI
8649	      (minus:SI
8650		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8651		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8652	      (minus:SI
8653		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8654		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8655	  (vec_concat:V4SI
8656	    (vec_concat:V2SI
8657	      (minus:SI
8658		(vec_select:SI
8659		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8660		  (parallel [(const_int 0)]))
8661		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8662	      (minus:SI
8663		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8664		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8665	    (vec_concat:V2SI
8666	      (minus:SI
8667		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8668		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8669	      (minus:SI
8670		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8671		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8672  "TARGET_AVX2"
8673  "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8674  [(set_attr "type" "sseiadd")
8675   (set_attr "prefix_extra" "1")
8676   (set_attr "prefix" "vex")
8677   (set_attr "mode" "OI")])
8678
8679(define_insn "ssse3_phsubdv4si3"
8680  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8681	(vec_concat:V4SI
8682	  (vec_concat:V2SI
8683	    (minus:SI
8684	      (vec_select:SI
8685		(match_operand:V4SI 1 "register_operand" "0,x")
8686		(parallel [(const_int 0)]))
8687	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8688	    (minus:SI
8689	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8690	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8691	  (vec_concat:V2SI
8692	    (minus:SI
8693	      (vec_select:SI
8694		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8695		(parallel [(const_int 0)]))
8696	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8697	    (minus:SI
8698	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8699	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8700  "TARGET_SSSE3"
8701  "@
8702   phsubd\t{%2, %0|%0, %2}
8703   vphsubd\t{%2, %1, %0|%0, %1, %2}"
8704
8705  [(set_attr "isa" "noavx,avx")
8706   (set_attr "type" "sseiadd")
8707   (set_attr "atom_unit" "complex")
8708   (set_attr "prefix_data16" "1,*")
8709   (set_attr "prefix_extra" "1")
8710   (set_attr "prefix" "orig,vex")
8711   (set_attr "mode" "TI")])
8712
8713(define_insn "ssse3_phsubdv2si3"
8714  [(set (match_operand:V2SI 0 "register_operand" "=y")
8715	(vec_concat:V2SI
8716	  (minus:SI
8717	    (vec_select:SI
8718	      (match_operand:V2SI 1 "register_operand" "0")
8719	      (parallel [(const_int 0)]))
8720	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8721	  (minus:SI
8722	    (vec_select:SI
8723	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8724	      (parallel [(const_int 0)]))
8725	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8726  "TARGET_SSSE3"
8727  "phsubd\t{%2, %0|%0, %2}"
8728  [(set_attr "type" "sseiadd")
8729   (set_attr "atom_unit" "complex")
8730   (set_attr "prefix_extra" "1")
8731   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8732   (set_attr "mode" "DI")])
8733
8734(define_insn "avx2_phsubswv16hi3"
8735  [(set (match_operand:V16HI 0 "register_operand" "=x")
8736	(vec_concat:V16HI
8737	  (vec_concat:V8HI
8738	    (vec_concat:V4HI
8739	      (vec_concat:V2HI
8740		(ss_minus:HI
8741		  (vec_select:HI
8742		    (match_operand:V16HI 1 "register_operand" "x")
8743		    (parallel [(const_int 0)]))
8744		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8745		(ss_minus:HI
8746		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8747		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8748	      (vec_concat:V2HI
8749		(ss_minus:HI
8750		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8751		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8752		(ss_minus:HI
8753		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8754		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8755	    (vec_concat:V4HI
8756	      (vec_concat:V2HI
8757		(ss_minus:HI
8758		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8759		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8760		(ss_minus:HI
8761		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8762		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8763	      (vec_concat:V2HI
8764		(ss_minus:HI
8765		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8766		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8767		(ss_minus:HI
8768		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8769		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8770	  (vec_concat:V8HI
8771	    (vec_concat:V4HI
8772	      (vec_concat:V2HI
8773		(ss_minus:HI
8774		  (vec_select:HI
8775		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8776		    (parallel [(const_int 0)]))
8777		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8778		(ss_minus:HI
8779		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8780		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8781	      (vec_concat:V2HI
8782		(ss_minus:HI
8783		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8784		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8785		(ss_minus:HI
8786		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8787		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8788	    (vec_concat:V4HI
8789	      (vec_concat:V2HI
8790		(ss_minus:HI
8791		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8792		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8793		(ss_minus:HI
8794		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8795		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8796	      (vec_concat:V2HI
8797		(ss_minus:HI
8798		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8799		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8800		(ss_minus:HI
8801		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8802		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8803  "TARGET_AVX2"
8804  "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8805  [(set_attr "type" "sseiadd")
8806   (set_attr "prefix_extra" "1")
8807   (set_attr "prefix" "vex")
8808   (set_attr "mode" "OI")])
8809
8810(define_insn "ssse3_phsubswv8hi3"
8811  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8812	(vec_concat:V8HI
8813	  (vec_concat:V4HI
8814	    (vec_concat:V2HI
8815	      (ss_minus:HI
8816		(vec_select:HI
8817		  (match_operand:V8HI 1 "register_operand" "0,x")
8818		  (parallel [(const_int 0)]))
8819		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8820	      (ss_minus:HI
8821		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8822		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8823	    (vec_concat:V2HI
8824	      (ss_minus:HI
8825		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8826		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8827	      (ss_minus:HI
8828		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8829		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8830	  (vec_concat:V4HI
8831	    (vec_concat:V2HI
8832	      (ss_minus:HI
8833		(vec_select:HI
8834		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8835		  (parallel [(const_int 0)]))
8836		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8837	      (ss_minus:HI
8838		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8839		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8840	    (vec_concat:V2HI
8841	      (ss_minus:HI
8842		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8843		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8844	      (ss_minus:HI
8845		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8846		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8847  "TARGET_SSSE3"
8848  "@
8849   phsubsw\t{%2, %0|%0, %2}
8850   vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8851  [(set_attr "isa" "noavx,avx")
8852   (set_attr "type" "sseiadd")
8853   (set_attr "atom_unit" "complex")
8854   (set_attr "prefix_data16" "1,*")
8855   (set_attr "prefix_extra" "1")
8856   (set_attr "prefix" "orig,vex")
8857   (set_attr "mode" "TI")])
8858
8859(define_insn "ssse3_phsubswv4hi3"
8860  [(set (match_operand:V4HI 0 "register_operand" "=y")
8861	(vec_concat:V4HI
8862	  (vec_concat:V2HI
8863	    (ss_minus:HI
8864	      (vec_select:HI
8865		(match_operand:V4HI 1 "register_operand" "0")
8866		(parallel [(const_int 0)]))
8867	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8868	    (ss_minus:HI
8869	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8870	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8871	  (vec_concat:V2HI
8872	    (ss_minus:HI
8873	      (vec_select:HI
8874		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8875		(parallel [(const_int 0)]))
8876	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8877	    (ss_minus:HI
8878	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8879	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8880  "TARGET_SSSE3"
8881  "phsubsw\t{%2, %0|%0, %2}"
8882  [(set_attr "type" "sseiadd")
8883   (set_attr "atom_unit" "complex")
8884   (set_attr "prefix_extra" "1")
8885   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8886   (set_attr "mode" "DI")])
8887
8888(define_insn "avx2_pmaddubsw256"
8889  [(set (match_operand:V16HI 0 "register_operand" "=x")
8890	(ss_plus:V16HI
8891	  (mult:V16HI
8892	    (zero_extend:V16HI
8893	      (vec_select:V16QI
8894		(match_operand:V32QI 1 "register_operand" "x")
8895		(parallel [(const_int 0)
8896			   (const_int 2)
8897			   (const_int 4)
8898			   (const_int 6)
8899			   (const_int 8)
8900			   (const_int 10)
8901			   (const_int 12)
8902			   (const_int 14)
8903			   (const_int 16)
8904			   (const_int 18)
8905			   (const_int 20)
8906			   (const_int 22)
8907			   (const_int 24)
8908			   (const_int 26)
8909			   (const_int 28)
8910			   (const_int 30)])))
8911	    (sign_extend:V16HI
8912	      (vec_select:V16QI
8913		(match_operand:V32QI 2 "nonimmediate_operand" "xm")
8914		(parallel [(const_int 0)
8915			   (const_int 2)
8916			   (const_int 4)
8917			   (const_int 6)
8918			   (const_int 8)
8919			   (const_int 10)
8920			   (const_int 12)
8921			   (const_int 14)
8922			   (const_int 16)
8923			   (const_int 18)
8924			   (const_int 20)
8925			   (const_int 22)
8926			   (const_int 24)
8927			   (const_int 26)
8928			   (const_int 28)
8929			   (const_int 30)]))))
8930	  (mult:V16HI
8931	    (zero_extend:V16HI
8932	      (vec_select:V16QI (match_dup 1)
8933		(parallel [(const_int 1)
8934			   (const_int 3)
8935			   (const_int 5)
8936			   (const_int 7)
8937			   (const_int 9)
8938			   (const_int 11)
8939			   (const_int 13)
8940			   (const_int 15)
8941			   (const_int 17)
8942			   (const_int 19)
8943			   (const_int 21)
8944			   (const_int 23)
8945			   (const_int 25)
8946			   (const_int 27)
8947			   (const_int 29)
8948			   (const_int 31)])))
8949	    (sign_extend:V16HI
8950	      (vec_select:V16QI (match_dup 2)
8951		(parallel [(const_int 1)
8952			   (const_int 3)
8953			   (const_int 5)
8954			   (const_int 7)
8955			   (const_int 9)
8956			   (const_int 11)
8957			   (const_int 13)
8958			   (const_int 15)
8959			   (const_int 17)
8960			   (const_int 19)
8961			   (const_int 21)
8962			   (const_int 23)
8963			   (const_int 25)
8964			   (const_int 27)
8965			   (const_int 29)
8966			   (const_int 31)]))))))]
8967  "TARGET_AVX2"
8968  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8969  [(set_attr "type" "sseiadd")
8970   (set_attr "prefix_extra" "1")
8971   (set_attr "prefix" "vex")
8972   (set_attr "mode" "OI")])
8973
8974(define_insn "ssse3_pmaddubsw128"
8975  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8976	(ss_plus:V8HI
8977	  (mult:V8HI
8978	    (zero_extend:V8HI
8979	      (vec_select:V8QI
8980		(match_operand:V16QI 1 "register_operand" "0,x")
8981		(parallel [(const_int 0)
8982			   (const_int 2)
8983			   (const_int 4)
8984			   (const_int 6)
8985			   (const_int 8)
8986			   (const_int 10)
8987			   (const_int 12)
8988			   (const_int 14)])))
8989	    (sign_extend:V8HI
8990	      (vec_select:V8QI
8991		(match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
8992		(parallel [(const_int 0)
8993			   (const_int 2)
8994			   (const_int 4)
8995			   (const_int 6)
8996			   (const_int 8)
8997			   (const_int 10)
8998			   (const_int 12)
8999			   (const_int 14)]))))
9000	  (mult:V8HI
9001	    (zero_extend:V8HI
9002	      (vec_select:V8QI (match_dup 1)
9003		(parallel [(const_int 1)
9004			   (const_int 3)
9005			   (const_int 5)
9006			   (const_int 7)
9007			   (const_int 9)
9008			   (const_int 11)
9009			   (const_int 13)
9010			   (const_int 15)])))
9011	    (sign_extend:V8HI
9012	      (vec_select:V8QI (match_dup 2)
9013		(parallel [(const_int 1)
9014			   (const_int 3)
9015			   (const_int 5)
9016			   (const_int 7)
9017			   (const_int 9)
9018			   (const_int 11)
9019			   (const_int 13)
9020			   (const_int 15)]))))))]
9021  "TARGET_SSSE3"
9022  "@
9023   pmaddubsw\t{%2, %0|%0, %2}
9024   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9025  [(set_attr "isa" "noavx,avx")
9026   (set_attr "type" "sseiadd")
9027   (set_attr "atom_unit" "simul")
9028   (set_attr "prefix_data16" "1,*")
9029   (set_attr "prefix_extra" "1")
9030   (set_attr "prefix" "orig,vex")
9031   (set_attr "mode" "TI")])
9032
9033(define_insn "ssse3_pmaddubsw"
9034  [(set (match_operand:V4HI 0 "register_operand" "=y")
9035	(ss_plus:V4HI
9036	  (mult:V4HI
9037	    (zero_extend:V4HI
9038	      (vec_select:V4QI
9039		(match_operand:V8QI 1 "register_operand" "0")
9040		(parallel [(const_int 0)
9041			   (const_int 2)
9042			   (const_int 4)
9043			   (const_int 6)])))
9044	    (sign_extend:V4HI
9045	      (vec_select:V4QI
9046		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
9047		(parallel [(const_int 0)
9048			   (const_int 2)
9049			   (const_int 4)
9050			   (const_int 6)]))))
9051	  (mult:V4HI
9052	    (zero_extend:V4HI
9053	      (vec_select:V4QI (match_dup 1)
9054		(parallel [(const_int 1)
9055			   (const_int 3)
9056			   (const_int 5)
9057			   (const_int 7)])))
9058	    (sign_extend:V4HI
9059	      (vec_select:V4QI (match_dup 2)
9060		(parallel [(const_int 1)
9061			   (const_int 3)
9062			   (const_int 5)
9063			   (const_int 7)]))))))]
9064  "TARGET_SSSE3"
9065  "pmaddubsw\t{%2, %0|%0, %2}"
9066  [(set_attr "type" "sseiadd")
9067   (set_attr "atom_unit" "simul")
9068   (set_attr "prefix_extra" "1")
9069   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9070   (set_attr "mode" "DI")])
9071
9072(define_expand "avx2_umulhrswv16hi3"
9073  [(set (match_operand:V16HI 0 "register_operand" "")
9074	(truncate:V16HI
9075	  (lshiftrt:V16SI
9076	    (plus:V16SI
9077	      (lshiftrt:V16SI
9078		(mult:V16SI
9079		  (sign_extend:V16SI
9080		    (match_operand:V16HI 1 "nonimmediate_operand" ""))
9081		  (sign_extend:V16SI
9082		    (match_operand:V16HI 2 "nonimmediate_operand" "")))
9083		(const_int 14))
9084	      (const_vector:V16HI [(const_int 1) (const_int 1)
9085				   (const_int 1) (const_int 1)
9086				   (const_int 1) (const_int 1)
9087				   (const_int 1) (const_int 1)
9088				   (const_int 1) (const_int 1)
9089				   (const_int 1) (const_int 1)
9090				   (const_int 1) (const_int 1)
9091				   (const_int 1) (const_int 1)]))
9092	    (const_int 1))))]
9093  "TARGET_AVX2"
9094  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9095
9096(define_insn "*avx2_umulhrswv16hi3"
9097  [(set (match_operand:V16HI 0 "register_operand" "=x")
9098	(truncate:V16HI
9099	  (lshiftrt:V16SI
9100	    (plus:V16SI
9101	      (lshiftrt:V16SI
9102		(mult:V16SI
9103		  (sign_extend:V16SI
9104		    (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9105		  (sign_extend:V16SI
9106		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9107		(const_int 14))
9108	      (const_vector:V16HI [(const_int 1) (const_int 1)
9109				   (const_int 1) (const_int 1)
9110				   (const_int 1) (const_int 1)
9111				   (const_int 1) (const_int 1)
9112				   (const_int 1) (const_int 1)
9113				   (const_int 1) (const_int 1)
9114				   (const_int 1) (const_int 1)
9115				   (const_int 1) (const_int 1)]))
9116	    (const_int 1))))]
9117  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9118  "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9119  [(set_attr "type" "sseimul")
9120   (set_attr "prefix_extra" "1")
9121   (set_attr "prefix" "vex")
9122   (set_attr "mode" "OI")])
9123
9124(define_expand "ssse3_pmulhrswv8hi3"
9125  [(set (match_operand:V8HI 0 "register_operand" "")
9126	(truncate:V8HI
9127	  (lshiftrt:V8SI
9128	    (plus:V8SI
9129	      (lshiftrt:V8SI
9130		(mult:V8SI
9131		  (sign_extend:V8SI
9132		    (match_operand:V8HI 1 "nonimmediate_operand" ""))
9133		  (sign_extend:V8SI
9134		    (match_operand:V8HI 2 "nonimmediate_operand" "")))
9135		(const_int 14))
9136	      (const_vector:V8HI [(const_int 1) (const_int 1)
9137				  (const_int 1) (const_int 1)
9138				  (const_int 1) (const_int 1)
9139				  (const_int 1) (const_int 1)]))
9140	    (const_int 1))))]
9141  "TARGET_SSSE3"
9142  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9143
9144(define_insn "*ssse3_pmulhrswv8hi3"
9145  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9146	(truncate:V8HI
9147	  (lshiftrt:V8SI
9148	    (plus:V8SI
9149	      (lshiftrt:V8SI
9150		(mult:V8SI
9151		  (sign_extend:V8SI
9152		    (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9153		  (sign_extend:V8SI
9154		    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9155		(const_int 14))
9156	      (const_vector:V8HI [(const_int 1) (const_int 1)
9157				  (const_int 1) (const_int 1)
9158				  (const_int 1) (const_int 1)
9159				  (const_int 1) (const_int 1)]))
9160	    (const_int 1))))]
9161  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9162  "@
9163   pmulhrsw\t{%2, %0|%0, %2}
9164   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9165  [(set_attr "isa" "noavx,avx")
9166   (set_attr "type" "sseimul")
9167   (set_attr "prefix_data16" "1,*")
9168   (set_attr "prefix_extra" "1")
9169   (set_attr "prefix" "orig,vex")
9170   (set_attr "mode" "TI")])
9171
9172(define_expand "ssse3_pmulhrswv4hi3"
9173  [(set (match_operand:V4HI 0 "register_operand" "")
9174	(truncate:V4HI
9175	  (lshiftrt:V4SI
9176	    (plus:V4SI
9177	      (lshiftrt:V4SI
9178		(mult:V4SI
9179		  (sign_extend:V4SI
9180		    (match_operand:V4HI 1 "nonimmediate_operand" ""))
9181		  (sign_extend:V4SI
9182		    (match_operand:V4HI 2 "nonimmediate_operand" "")))
9183		(const_int 14))
9184	      (const_vector:V4HI [(const_int 1) (const_int 1)
9185				  (const_int 1) (const_int 1)]))
9186	    (const_int 1))))]
9187  "TARGET_SSSE3"
9188  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9189
9190(define_insn "*ssse3_pmulhrswv4hi3"
9191  [(set (match_operand:V4HI 0 "register_operand" "=y")
9192	(truncate:V4HI
9193	  (lshiftrt:V4SI
9194	    (plus:V4SI
9195	      (lshiftrt:V4SI
9196		(mult:V4SI
9197		  (sign_extend:V4SI
9198		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9199		  (sign_extend:V4SI
9200		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9201		(const_int 14))
9202	      (const_vector:V4HI [(const_int 1) (const_int 1)
9203				  (const_int 1) (const_int 1)]))
9204	    (const_int 1))))]
9205  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9206  "pmulhrsw\t{%2, %0|%0, %2}"
9207  [(set_attr "type" "sseimul")
9208   (set_attr "prefix_extra" "1")
9209   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9210   (set_attr "mode" "DI")])
9211
9212(define_insn "<ssse3_avx2>_pshufb<mode>3"
9213  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9214	(unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9215			  (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9216			 UNSPEC_PSHUFB))]
9217  "TARGET_SSSE3"
9218  "@
9219   pshufb\t{%2, %0|%0, %2}
9220   vpshufb\t{%2, %1, %0|%0, %1, %2}"
9221  [(set_attr "isa" "noavx,avx")
9222   (set_attr "type" "sselog1")
9223   (set_attr "prefix_data16" "1,*")
9224   (set_attr "prefix_extra" "1")
9225   (set_attr "prefix" "orig,vex")
9226   (set_attr "mode" "<sseinsnmode>")])
9227
9228(define_insn "ssse3_pshufbv8qi3"
9229  [(set (match_operand:V8QI 0 "register_operand" "=y")
9230	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9231		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9232		     UNSPEC_PSHUFB))]
9233  "TARGET_SSSE3"
9234  "pshufb\t{%2, %0|%0, %2}";
9235  [(set_attr "type" "sselog1")
9236   (set_attr "prefix_extra" "1")
9237   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9238   (set_attr "mode" "DI")])
9239
9240(define_insn "<ssse3_avx2>_psign<mode>3"
9241  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9242	(unspec:VI124_AVX2
9243	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9244	   (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9245	  UNSPEC_PSIGN))]
9246  "TARGET_SSSE3"
9247  "@
9248   psign<ssemodesuffix>\t{%2, %0|%0, %2}
9249   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9250  [(set_attr "isa" "noavx,avx")
9251   (set_attr "type" "sselog1")
9252   (set_attr "prefix_data16" "1,*")
9253   (set_attr "prefix_extra" "1")
9254   (set_attr "prefix" "orig,vex")
9255   (set_attr "mode" "<sseinsnmode>")])
9256
9257(define_insn "ssse3_psign<mode>3"
9258  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9259	(unspec:MMXMODEI
9260	  [(match_operand:MMXMODEI 1 "register_operand" "0")
9261	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9262	  UNSPEC_PSIGN))]
9263  "TARGET_SSSE3"
9264  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9265  [(set_attr "type" "sselog1")
9266   (set_attr "prefix_extra" "1")
9267   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9268   (set_attr "mode" "DI")])
9269
9270(define_insn "<ssse3_avx2>_palignr<mode>"
9271  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9272	(unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9273			       (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9274			       (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9275			      UNSPEC_PALIGNR))]
9276  "TARGET_SSSE3"
9277{
9278  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9279
9280  switch (which_alternative)
9281    {
9282    case 0:
9283      return "palignr\t{%3, %2, %0|%0, %2, %3}";
9284    case 1:
9285      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9286    default:
9287      gcc_unreachable ();
9288    }
9289}
9290  [(set_attr "isa" "noavx,avx")
9291   (set_attr "type" "sseishft")
9292   (set_attr "atom_unit" "sishuf")
9293   (set_attr "prefix_data16" "1,*")
9294   (set_attr "prefix_extra" "1")
9295   (set_attr "length_immediate" "1")
9296   (set_attr "prefix" "orig,vex")
9297   (set_attr "mode" "<sseinsnmode>")])
9298
9299(define_insn "ssse3_palignrdi"
9300  [(set (match_operand:DI 0 "register_operand" "=y")
9301	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
9302		    (match_operand:DI 2 "nonimmediate_operand" "ym")
9303		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9304		   UNSPEC_PALIGNR))]
9305  "TARGET_SSSE3"
9306{
9307  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9308  return "palignr\t{%3, %2, %0|%0, %2, %3}";
9309}
9310  [(set_attr "type" "sseishft")
9311   (set_attr "atom_unit" "sishuf")
9312   (set_attr "prefix_extra" "1")
9313   (set_attr "length_immediate" "1")
9314   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9315   (set_attr "mode" "DI")])
9316
9317(define_insn "abs<mode>2"
9318  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9319	(abs:VI124_AVX2
9320	  (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9321  "TARGET_SSSE3"
9322  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9323  [(set_attr "type" "sselog1")
9324   (set_attr "prefix_data16" "1")
9325   (set_attr "prefix_extra" "1")
9326   (set_attr "prefix" "maybe_vex")
9327   (set_attr "mode" "<sseinsnmode>")])
9328
9329(define_insn "abs<mode>2"
9330  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9331	(abs:MMXMODEI
9332	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9333  "TARGET_SSSE3"
9334  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9335  [(set_attr "type" "sselog1")
9336   (set_attr "prefix_rep" "0")
9337   (set_attr "prefix_extra" "1")
9338   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9339   (set_attr "mode" "DI")])
9340
9341;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9342;;
9343;; AMD SSE4A instructions
9344;;
9345;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9346
9347(define_insn "sse4a_movnt<mode>"
9348  [(set (match_operand:MODEF 0 "memory_operand" "=m")
9349	(unspec:MODEF
9350	  [(match_operand:MODEF 1 "register_operand" "x")]
9351	  UNSPEC_MOVNT))]
9352  "TARGET_SSE4A"
9353  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9354  [(set_attr "type" "ssemov")
9355   (set_attr "mode" "<MODE>")])
9356
9357(define_insn "sse4a_vmmovnt<mode>"
9358  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9359	(unspec:<ssescalarmode>
9360	  [(vec_select:<ssescalarmode>
9361	     (match_operand:VF_128 1 "register_operand" "x")
9362	     (parallel [(const_int 0)]))]
9363	  UNSPEC_MOVNT))]
9364  "TARGET_SSE4A"
9365  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9366  [(set_attr "type" "ssemov")
9367   (set_attr "mode" "<ssescalarmode>")])
9368
9369(define_insn "sse4a_extrqi"
9370  [(set (match_operand:V2DI 0 "register_operand" "=x")
9371	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9372		      (match_operand 2 "const_0_to_255_operand" "")
9373		      (match_operand 3 "const_0_to_255_operand" "")]
9374		     UNSPEC_EXTRQI))]
9375  "TARGET_SSE4A"
9376  "extrq\t{%3, %2, %0|%0, %2, %3}"
9377  [(set_attr "type" "sse")
9378   (set_attr "prefix_data16" "1")
9379   (set_attr "length_immediate" "2")
9380   (set_attr "mode" "TI")])
9381
9382(define_insn "sse4a_extrq"
9383  [(set (match_operand:V2DI 0 "register_operand" "=x")
9384	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9385		      (match_operand:V16QI 2 "register_operand" "x")]
9386		     UNSPEC_EXTRQ))]
9387  "TARGET_SSE4A"
9388  "extrq\t{%2, %0|%0, %2}"
9389  [(set_attr "type" "sse")
9390   (set_attr "prefix_data16" "1")
9391   (set_attr "mode" "TI")])
9392
9393(define_insn "sse4a_insertqi"
9394  [(set (match_operand:V2DI 0 "register_operand" "=x")
9395	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9396		      (match_operand:V2DI 2 "register_operand" "x")
9397		      (match_operand 3 "const_0_to_255_operand" "")
9398		      (match_operand 4 "const_0_to_255_operand" "")]
9399		     UNSPEC_INSERTQI))]
9400  "TARGET_SSE4A"
9401  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9402  [(set_attr "type" "sseins")
9403   (set_attr "prefix_data16" "0")
9404   (set_attr "prefix_rep" "1")
9405   (set_attr "length_immediate" "2")
9406   (set_attr "mode" "TI")])
9407
9408(define_insn "sse4a_insertq"
9409  [(set (match_operand:V2DI 0 "register_operand" "=x")
9410	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9411		      (match_operand:V2DI 2 "register_operand" "x")]
9412		     UNSPEC_INSERTQ))]
9413  "TARGET_SSE4A"
9414  "insertq\t{%2, %0|%0, %2}"
9415  [(set_attr "type" "sseins")
9416   (set_attr "prefix_data16" "0")
9417   (set_attr "prefix_rep" "1")
9418   (set_attr "mode" "TI")])
9419
9420;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9421;;
9422;; Intel SSE4.1 instructions
9423;;
9424;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9425
9426(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9427  [(set (match_operand:VF 0 "register_operand" "=x,x")
9428	(vec_merge:VF
9429	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9430	  (match_operand:VF 1 "register_operand" "0,x")
9431	  (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9432  "TARGET_SSE4_1"
9433  "@
9434   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9435   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9436  [(set_attr "isa" "noavx,avx")
9437   (set_attr "type" "ssemov")
9438   (set_attr "length_immediate" "1")
9439   (set_attr "prefix_data16" "1,*")
9440   (set_attr "prefix_extra" "1")
9441   (set_attr "prefix" "orig,vex")
9442   (set_attr "mode" "<MODE>")])
9443
9444(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9445  [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9446	(unspec:VF
9447	  [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9448	   (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9449	   (match_operand:VF 3 "register_operand" "Yz,x")]
9450	  UNSPEC_BLENDV))]
9451  "TARGET_SSE4_1"
9452  "@
9453   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9454   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9455  [(set_attr "isa" "noavx,avx")
9456   (set_attr "type" "ssemov")
9457   (set_attr "length_immediate" "1")
9458   (set_attr "prefix_data16" "1,*")
9459   (set_attr "prefix_extra" "1")
9460   (set_attr "prefix" "orig,vex")
9461   (set_attr "mode" "<MODE>")])
9462
9463(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9464  [(set (match_operand:VF 0 "register_operand" "=x,x")
9465	(unspec:VF
9466	  [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9467	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9468	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9469	  UNSPEC_DP))]
9470  "TARGET_SSE4_1"
9471  "@
9472   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9473   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9474  [(set_attr "isa" "noavx,avx")
9475   (set_attr "type" "ssemul")
9476   (set_attr "length_immediate" "1")
9477   (set_attr "prefix_data16" "1,*")
9478   (set_attr "prefix_extra" "1")
9479   (set_attr "prefix" "orig,vex")
9480   (set_attr "mode" "<MODE>")])
9481
9482(define_insn "<sse4_1_avx2>_movntdqa"
9483  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9484	(unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9485		     UNSPEC_MOVNTDQA))]
9486  "TARGET_SSE4_1"
9487  "%vmovntdqa\t{%1, %0|%0, %1}"
9488  [(set_attr "type" "ssemov")
9489   (set_attr "prefix_extra" "1")
9490   (set_attr "prefix" "maybe_vex")
9491   (set_attr "mode" "<sseinsnmode>")])
9492
9493(define_insn "<sse4_1_avx2>_mpsadbw"
9494  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9495	(unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9496			  (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9497			  (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9498			 UNSPEC_MPSADBW))]
9499  "TARGET_SSE4_1"
9500  "@
9501   mpsadbw\t{%3, %2, %0|%0, %2, %3}
9502   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9503  [(set_attr "isa" "noavx,avx")
9504   (set_attr "type" "sselog1")
9505   (set_attr "length_immediate" "1")
9506   (set_attr "prefix_extra" "1")
9507   (set_attr "prefix" "orig,vex")
9508   (set_attr "mode" "<sseinsnmode>")])
9509
9510(define_insn "avx2_packusdw"
9511  [(set (match_operand:V16HI 0 "register_operand" "=x")
9512	(vec_concat:V16HI
9513	  (us_truncate:V8HI
9514	    (match_operand:V8SI 1 "register_operand" "x"))
9515	  (us_truncate:V8HI
9516	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9517  "TARGET_AVX2"
9518  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9519  [(set_attr "type" "sselog")
9520   (set_attr "prefix_extra" "1")
9521   (set_attr "prefix" "vex")
9522   (set_attr "mode" "OI")])
9523
9524(define_insn "sse4_1_packusdw"
9525  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9526	(vec_concat:V8HI
9527	  (us_truncate:V4HI
9528	    (match_operand:V4SI 1 "register_operand" "0,x"))
9529	  (us_truncate:V4HI
9530	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9531  "TARGET_SSE4_1"
9532  "@
9533   packusdw\t{%2, %0|%0, %2}
9534   vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9535  [(set_attr "isa" "noavx,avx")
9536   (set_attr "type" "sselog")
9537   (set_attr "prefix_extra" "1")
9538   (set_attr "prefix" "orig,vex")
9539   (set_attr "mode" "TI")])
9540
9541(define_insn "<sse4_1_avx2>_pblendvb"
9542  [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9543	(unspec:VI1_AVX2
9544	  [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx"  "0,x")
9545	   (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9546	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9547	  UNSPEC_BLENDV))]
9548  "TARGET_SSE4_1"
9549  "@
9550   pblendvb\t{%3, %2, %0|%0, %2, %3}
9551   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9552  [(set_attr "isa" "noavx,avx")
9553   (set_attr "type" "ssemov")
9554   (set_attr "prefix_extra" "1")
9555   (set_attr "length_immediate" "*,1")
9556   (set_attr "prefix" "orig,vex")
9557   (set_attr "mode" "<sseinsnmode>")])
9558
9559(define_insn "sse4_1_pblendw"
9560  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9561	(vec_merge:V8HI
9562	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9563	  (match_operand:V8HI 1 "register_operand" "0,x")
9564	  (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9565  "TARGET_SSE4_1"
9566  "@
9567   pblendw\t{%3, %2, %0|%0, %2, %3}
9568   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9569  [(set_attr "isa" "noavx,avx")
9570   (set_attr "type" "ssemov")
9571   (set_attr "prefix_extra" "1")
9572   (set_attr "length_immediate" "1")
9573   (set_attr "prefix" "orig,vex")
9574   (set_attr "mode" "TI")])
9575
9576;; The builtin uses an 8-bit immediate.  Expand that.
9577(define_expand "avx2_pblendw"
9578  [(set (match_operand:V16HI 0 "register_operand" "")
9579	(vec_merge:V16HI
9580	  (match_operand:V16HI 2 "nonimmediate_operand" "")
9581	  (match_operand:V16HI 1 "register_operand" "")
9582	  (match_operand:SI 3 "const_0_to_255_operand" "")))]
9583  "TARGET_AVX2"
9584{
9585  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9586  operands[3] = GEN_INT (val << 8 | val);
9587})
9588
9589(define_insn "*avx2_pblendw"
9590  [(set (match_operand:V16HI 0 "register_operand" "=x")
9591	(vec_merge:V16HI
9592	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9593	  (match_operand:V16HI 1 "register_operand" "x")
9594	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9595  "TARGET_AVX2"
9596{
9597  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9598  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9599}
9600  [(set_attr "type" "ssemov")
9601   (set_attr "prefix_extra" "1")
9602   (set_attr "length_immediate" "1")
9603   (set_attr "prefix" "vex")
9604   (set_attr "mode" "OI")])
9605
9606(define_insn "avx2_pblendd<mode>"
9607  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9608	(vec_merge:VI4_AVX2
9609	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9610	  (match_operand:VI4_AVX2 1 "register_operand" "x")
9611	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9612  "TARGET_AVX2"
9613  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9614  [(set_attr "type" "ssemov")
9615   (set_attr "prefix_extra" "1")
9616   (set_attr "length_immediate" "1")
9617   (set_attr "prefix" "vex")
9618   (set_attr "mode" "<sseinsnmode>")])
9619
9620(define_insn "sse4_1_phminposuw"
9621  [(set (match_operand:V8HI 0 "register_operand" "=x")
9622	(unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9623		     UNSPEC_PHMINPOSUW))]
9624  "TARGET_SSE4_1"
9625  "%vphminposuw\t{%1, %0|%0, %1}"
9626  [(set_attr "type" "sselog1")
9627   (set_attr "prefix_extra" "1")
9628   (set_attr "prefix" "maybe_vex")
9629   (set_attr "mode" "TI")])
9630
9631(define_insn "avx2_<code>v16qiv16hi2"
9632  [(set (match_operand:V16HI 0 "register_operand" "=x")
9633	(any_extend:V16HI
9634	  (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9635  "TARGET_AVX2"
9636  "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9637  [(set_attr "type" "ssemov")
9638   (set_attr "prefix_extra" "1")
9639   (set_attr "prefix" "vex")
9640   (set_attr "mode" "OI")])
9641
9642(define_insn "sse4_1_<code>v8qiv8hi2"
9643  [(set (match_operand:V8HI 0 "register_operand" "=x")
9644	(any_extend:V8HI
9645	  (vec_select:V8QI
9646	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9647	    (parallel [(const_int 0)
9648		       (const_int 1)
9649		       (const_int 2)
9650		       (const_int 3)
9651		       (const_int 4)
9652		       (const_int 5)
9653		       (const_int 6)
9654		       (const_int 7)]))))]
9655  "TARGET_SSE4_1"
9656  "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9657  [(set_attr "type" "ssemov")
9658   (set_attr "prefix_extra" "1")
9659   (set_attr "prefix" "maybe_vex")
9660   (set_attr "mode" "TI")])
9661
9662(define_insn "avx2_<code>v8qiv8si2"
9663  [(set (match_operand:V8SI 0 "register_operand" "=x")
9664	(any_extend:V8SI
9665	  (vec_select:V8QI
9666	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9667	    (parallel [(const_int 0)
9668		       (const_int 1)
9669		       (const_int 2)
9670		       (const_int 3)
9671		       (const_int 4)
9672		       (const_int 5)
9673		       (const_int 6)
9674		       (const_int 7)]))))]
9675  "TARGET_AVX2"
9676  "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9677  [(set_attr "type" "ssemov")
9678   (set_attr "prefix_extra" "1")
9679   (set_attr "prefix" "vex")
9680   (set_attr "mode" "OI")])
9681
9682(define_insn "sse4_1_<code>v4qiv4si2"
9683  [(set (match_operand:V4SI 0 "register_operand" "=x")
9684	(any_extend:V4SI
9685	  (vec_select:V4QI
9686	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9687	    (parallel [(const_int 0)
9688		       (const_int 1)
9689		       (const_int 2)
9690		       (const_int 3)]))))]
9691  "TARGET_SSE4_1"
9692  "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9693  [(set_attr "type" "ssemov")
9694   (set_attr "prefix_extra" "1")
9695   (set_attr "prefix" "maybe_vex")
9696   (set_attr "mode" "TI")])
9697
9698(define_insn "avx2_<code>v8hiv8si2"
9699  [(set (match_operand:V8SI 0 "register_operand" "=x")
9700	(any_extend:V8SI
9701	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9702  "TARGET_AVX2"
9703  "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9704  [(set_attr "type" "ssemov")
9705   (set_attr "prefix_extra" "1")
9706   (set_attr "prefix" "vex")
9707   (set_attr "mode" "OI")])
9708
9709(define_insn "sse4_1_<code>v4hiv4si2"
9710  [(set (match_operand:V4SI 0 "register_operand" "=x")
9711	(any_extend:V4SI
9712	  (vec_select:V4HI
9713	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9714	    (parallel [(const_int 0)
9715		       (const_int 1)
9716		       (const_int 2)
9717		       (const_int 3)]))))]
9718  "TARGET_SSE4_1"
9719  "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9720  [(set_attr "type" "ssemov")
9721   (set_attr "prefix_extra" "1")
9722   (set_attr "prefix" "maybe_vex")
9723   (set_attr "mode" "TI")])
9724
9725(define_insn "avx2_<code>v4qiv4di2"
9726  [(set (match_operand:V4DI 0 "register_operand" "=x")
9727	(any_extend:V4DI
9728	  (vec_select:V4QI
9729	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9730	    (parallel [(const_int 0)
9731		       (const_int 1)
9732		       (const_int 2)
9733		       (const_int 3)]))))]
9734  "TARGET_AVX2"
9735  "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9736  [(set_attr "type" "ssemov")
9737   (set_attr "prefix_extra" "1")
9738   (set_attr "prefix" "vex")
9739   (set_attr "mode" "OI")])
9740
9741(define_insn "sse4_1_<code>v2qiv2di2"
9742  [(set (match_operand:V2DI 0 "register_operand" "=x")
9743	(any_extend:V2DI
9744	  (vec_select:V2QI
9745	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9746	    (parallel [(const_int 0)
9747		       (const_int 1)]))))]
9748  "TARGET_SSE4_1"
9749  "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9750  [(set_attr "type" "ssemov")
9751   (set_attr "prefix_extra" "1")
9752   (set_attr "prefix" "maybe_vex")
9753   (set_attr "mode" "TI")])
9754
9755(define_insn "avx2_<code>v4hiv4di2"
9756  [(set (match_operand:V4DI 0 "register_operand" "=x")
9757	(any_extend:V4DI
9758	  (vec_select:V4HI
9759	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9760	    (parallel [(const_int 0)
9761		       (const_int 1)
9762		       (const_int 2)
9763		       (const_int 3)]))))]
9764  "TARGET_AVX2"
9765  "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9766  [(set_attr "type" "ssemov")
9767   (set_attr "prefix_extra" "1")
9768   (set_attr "prefix" "vex")
9769   (set_attr "mode" "OI")])
9770
9771(define_insn "sse4_1_<code>v2hiv2di2"
9772  [(set (match_operand:V2DI 0 "register_operand" "=x")
9773	(any_extend:V2DI
9774	  (vec_select:V2HI
9775	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9776	    (parallel [(const_int 0)
9777		       (const_int 1)]))))]
9778  "TARGET_SSE4_1"
9779  "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9780  [(set_attr "type" "ssemov")
9781   (set_attr "prefix_extra" "1")
9782   (set_attr "prefix" "maybe_vex")
9783   (set_attr "mode" "TI")])
9784
9785(define_insn "avx2_<code>v4siv4di2"
9786  [(set (match_operand:V4DI 0 "register_operand" "=x")
9787	(any_extend:V4DI
9788	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9789  "TARGET_AVX2"
9790  "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9791  [(set_attr "type" "ssemov")
9792   (set_attr "prefix_extra" "1")
9793   (set_attr "mode" "OI")])
9794
9795(define_insn "sse4_1_<code>v2siv2di2"
9796  [(set (match_operand:V2DI 0 "register_operand" "=x")
9797	(any_extend:V2DI
9798	  (vec_select:V2SI
9799	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9800	    (parallel [(const_int 0)
9801		       (const_int 1)]))))]
9802  "TARGET_SSE4_1"
9803  "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9804  [(set_attr "type" "ssemov")
9805   (set_attr "prefix_extra" "1")
9806   (set_attr "prefix" "maybe_vex")
9807   (set_attr "mode" "TI")])
9808
9809;; ptestps/ptestpd are very similar to comiss and ucomiss when
9810;; setting FLAGS_REG. But it is not a really compare instruction.
9811(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9812  [(set (reg:CC FLAGS_REG)
9813	(unspec:CC [(match_operand:VF 0 "register_operand" "x")
9814		    (match_operand:VF 1 "nonimmediate_operand" "xm")]
9815		   UNSPEC_VTESTP))]
9816  "TARGET_AVX"
9817  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9818  [(set_attr "type" "ssecomi")
9819   (set_attr "prefix_extra" "1")
9820   (set_attr "prefix" "vex")
9821   (set_attr "mode" "<MODE>")])
9822
9823;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9824;; But it is not a really compare instruction.
9825(define_insn "avx_ptest256"
9826  [(set (reg:CC FLAGS_REG)
9827	(unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9828		    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9829		   UNSPEC_PTEST))]
9830  "TARGET_AVX"
9831  "vptest\t{%1, %0|%0, %1}"
9832  [(set_attr "type" "ssecomi")
9833   (set_attr "prefix_extra" "1")
9834   (set_attr "prefix" "vex")
9835   (set_attr "mode" "OI")])
9836
9837(define_insn "sse4_1_ptest"
9838  [(set (reg:CC FLAGS_REG)
9839	(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9840		    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9841		   UNSPEC_PTEST))]
9842  "TARGET_SSE4_1"
9843  "%vptest\t{%1, %0|%0, %1}"
9844  [(set_attr "type" "ssecomi")
9845   (set_attr "prefix_extra" "1")
9846   (set_attr "prefix" "maybe_vex")
9847   (set_attr "mode" "TI")])
9848
9849(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9850  [(set (match_operand:VF 0 "register_operand" "=x")
9851	(unspec:VF
9852	  [(match_operand:VF 1 "nonimmediate_operand" "xm")
9853	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
9854	  UNSPEC_ROUND))]
9855  "TARGET_ROUND"
9856  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9857  [(set_attr "type" "ssecvt")
9858   (set (attr "prefix_data16")
9859     (if_then_else
9860       (match_test "TARGET_AVX")
9861     (const_string "*")
9862     (const_string "1")))
9863   (set_attr "prefix_extra" "1")
9864   (set_attr "length_immediate" "1")
9865   (set_attr "prefix" "maybe_vex")
9866   (set_attr "mode" "<MODE>")])
9867
9868(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9869  [(match_operand:<sseintvecmode> 0 "register_operand" "")
9870   (match_operand:VF1 1 "nonimmediate_operand" "")
9871   (match_operand:SI 2 "const_0_to_15_operand" "")]
9872  "TARGET_ROUND"
9873{
9874  rtx tmp = gen_reg_rtx (<MODE>mode);
9875
9876  emit_insn
9877    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9878						       operands[2]));
9879  emit_insn
9880    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9881  DONE;
9882})
9883
9884(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9885  [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9886   (match_operand:VF2 1 "nonimmediate_operand" "")
9887   (match_operand:VF2 2 "nonimmediate_operand" "")
9888   (match_operand:SI 3 "const_0_to_15_operand" "")]
9889  "TARGET_ROUND"
9890{
9891  rtx tmp0, tmp1;
9892
9893  if (<MODE>mode == V2DFmode
9894      && TARGET_AVX && !TARGET_PREFER_AVX128)
9895    {
9896      rtx tmp2 = gen_reg_rtx (V4DFmode);
9897
9898      tmp0 = gen_reg_rtx (V4DFmode);
9899      tmp1 = force_reg (V2DFmode, operands[1]);
9900
9901      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9902      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9903      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9904    }
9905  else
9906    {
9907      tmp0 = gen_reg_rtx (<MODE>mode);
9908      tmp1 = gen_reg_rtx (<MODE>mode);
9909
9910      emit_insn
9911       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
9912							  operands[3]));
9913      emit_insn
9914       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
9915							  operands[3]));
9916      emit_insn
9917       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9918    }
9919  DONE;
9920})
9921
9922(define_insn "sse4_1_round<ssescalarmodesuffix>"
9923  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9924	(vec_merge:VF_128
9925	  (unspec:VF_128
9926	    [(match_operand:VF_128 2 "register_operand" "x,x")
9927	     (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9928	    UNSPEC_ROUND)
9929	  (match_operand:VF_128 1 "register_operand" "0,x")
9930	  (const_int 1)))]
9931  "TARGET_ROUND"
9932  "@
9933   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9934   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9935  [(set_attr "isa" "noavx,avx")
9936   (set_attr "type" "ssecvt")
9937   (set_attr "length_immediate" "1")
9938   (set_attr "prefix_data16" "1,*")
9939   (set_attr "prefix_extra" "1")
9940   (set_attr "prefix" "orig,vex")
9941   (set_attr "mode" "<MODE>")])
9942
9943(define_expand "round<mode>2"
9944  [(set (match_dup 4)
9945	(plus:VF
9946	  (match_operand:VF 1 "register_operand" "")
9947	  (match_dup 3)))
9948   (set (match_operand:VF 0 "register_operand" "")
9949	(unspec:VF
9950	  [(match_dup 4) (match_dup 5)]
9951	  UNSPEC_ROUND))]
9952  "TARGET_ROUND && !flag_trapping_math"
9953{
9954  enum machine_mode scalar_mode;
9955  const struct real_format *fmt;
9956  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9957  rtx half, vec_half;
9958
9959  scalar_mode = GET_MODE_INNER (<MODE>mode);
9960
9961  /* load nextafter (0.5, 0.0) */
9962  fmt = REAL_MODE_FORMAT (scalar_mode);
9963  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9964  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9965  half = const_double_from_real_value (pred_half, scalar_mode);
9966
9967  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9968  vec_half = force_reg (<MODE>mode, vec_half);
9969
9970  operands[3] = gen_reg_rtx (<MODE>mode);
9971  emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9972
9973  operands[4] = gen_reg_rtx (<MODE>mode);
9974  operands[5] = GEN_INT (ROUND_TRUNC);
9975})
9976
9977(define_expand "round<mode>2_sfix"
9978  [(match_operand:<sseintvecmode> 0 "register_operand" "")
9979   (match_operand:VF1 1 "register_operand" "")]
9980  "TARGET_ROUND && !flag_trapping_math"
9981{
9982  rtx tmp = gen_reg_rtx (<MODE>mode);
9983
9984  emit_insn (gen_round<mode>2 (tmp, operands[1]));
9985
9986  emit_insn
9987    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9988  DONE;
9989})
9990
9991(define_expand "round<mode>2_vec_pack_sfix"
9992  [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9993   (match_operand:VF2 1 "register_operand" "")
9994   (match_operand:VF2 2 "register_operand" "")]
9995  "TARGET_ROUND && !flag_trapping_math"
9996{
9997  rtx tmp0, tmp1;
9998
9999  if (<MODE>mode == V2DFmode
10000      && TARGET_AVX && !TARGET_PREFER_AVX128)
10001    {
10002      rtx tmp2 = gen_reg_rtx (V4DFmode);
10003
10004      tmp0 = gen_reg_rtx (V4DFmode);
10005      tmp1 = force_reg (V2DFmode, operands[1]);
10006
10007      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10008      emit_insn (gen_roundv4df2 (tmp2, tmp0));
10009      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10010    }
10011  else
10012    {
10013      tmp0 = gen_reg_rtx (<MODE>mode);
10014      tmp1 = gen_reg_rtx (<MODE>mode);
10015
10016      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
10017      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
10018
10019      emit_insn
10020       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10021    }
10022  DONE;
10023})
10024
10025;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10026;;
10027;; Intel SSE4.2 string/text processing instructions
10028;;
10029;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10030
10031(define_insn_and_split "sse4_2_pcmpestr"
10032  [(set (match_operand:SI 0 "register_operand" "=c,c")
10033	(unspec:SI
10034	  [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10035	   (match_operand:SI 3 "register_operand" "a,a")
10036	   (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10037	   (match_operand:SI 5 "register_operand" "d,d")
10038	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10039	  UNSPEC_PCMPESTR))
10040   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10041	(unspec:V16QI
10042	  [(match_dup 2)
10043	   (match_dup 3)
10044	   (match_dup 4)
10045	   (match_dup 5)
10046	   (match_dup 6)]
10047	  UNSPEC_PCMPESTR))
10048   (set (reg:CC FLAGS_REG)
10049	(unspec:CC
10050	  [(match_dup 2)
10051	   (match_dup 3)
10052	   (match_dup 4)
10053	   (match_dup 5)
10054	   (match_dup 6)]
10055	  UNSPEC_PCMPESTR))]
10056  "TARGET_SSE4_2
10057   && can_create_pseudo_p ()"
10058  "#"
10059  "&& 1"
10060  [(const_int 0)]
10061{
10062  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10063  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10064  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10065
10066  if (ecx)
10067    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10068				     operands[3], operands[4],
10069				     operands[5], operands[6]));
10070  if (xmm0)
10071    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10072				     operands[3], operands[4],
10073				     operands[5], operands[6]));
10074  if (flags && !(ecx || xmm0))
10075    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10076					   operands[2], operands[3],
10077					   operands[4], operands[5],
10078					   operands[6]));
10079  if (!(flags || ecx || xmm0))
10080    emit_note (NOTE_INSN_DELETED);
10081
10082  DONE;
10083}
10084  [(set_attr "type" "sselog")
10085   (set_attr "prefix_data16" "1")
10086   (set_attr "prefix_extra" "1")
10087   (set_attr "length_immediate" "1")
10088   (set_attr "memory" "none,load")
10089   (set_attr "mode" "TI")])
10090
10091(define_insn "sse4_2_pcmpestri"
10092  [(set (match_operand:SI 0 "register_operand" "=c,c")
10093	(unspec:SI
10094	  [(match_operand:V16QI 1 "register_operand" "x,x")
10095	   (match_operand:SI 2 "register_operand" "a,a")
10096	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10097	   (match_operand:SI 4 "register_operand" "d,d")
10098	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10099	  UNSPEC_PCMPESTR))
10100   (set (reg:CC FLAGS_REG)
10101	(unspec:CC
10102	  [(match_dup 1)
10103	   (match_dup 2)
10104	   (match_dup 3)
10105	   (match_dup 4)
10106	   (match_dup 5)]
10107	  UNSPEC_PCMPESTR))]
10108  "TARGET_SSE4_2"
10109  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10110  [(set_attr "type" "sselog")
10111   (set_attr "prefix_data16" "1")
10112   (set_attr "prefix_extra" "1")
10113   (set_attr "prefix" "maybe_vex")
10114   (set_attr "length_immediate" "1")
10115   (set_attr "memory" "none,load")
10116   (set_attr "mode" "TI")])
10117
10118(define_insn "sse4_2_pcmpestrm"
10119  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10120	(unspec:V16QI
10121	  [(match_operand:V16QI 1 "register_operand" "x,x")
10122	   (match_operand:SI 2 "register_operand" "a,a")
10123	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10124	   (match_operand:SI 4 "register_operand" "d,d")
10125	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10126	  UNSPEC_PCMPESTR))
10127   (set (reg:CC FLAGS_REG)
10128	(unspec:CC
10129	  [(match_dup 1)
10130	   (match_dup 2)
10131	   (match_dup 3)
10132	   (match_dup 4)
10133	   (match_dup 5)]
10134	  UNSPEC_PCMPESTR))]
10135  "TARGET_SSE4_2"
10136  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10137  [(set_attr "type" "sselog")
10138   (set_attr "prefix_data16" "1")
10139   (set_attr "prefix_extra" "1")
10140   (set_attr "length_immediate" "1")
10141   (set_attr "prefix" "maybe_vex")
10142   (set_attr "memory" "none,load")
10143   (set_attr "mode" "TI")])
10144
10145(define_insn "sse4_2_pcmpestr_cconly"
10146  [(set (reg:CC FLAGS_REG)
10147	(unspec:CC
10148	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10149	   (match_operand:SI 3 "register_operand" "a,a,a,a")
10150	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10151	   (match_operand:SI 5 "register_operand" "d,d,d,d")
10152	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10153	  UNSPEC_PCMPESTR))
10154   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10155   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10156  "TARGET_SSE4_2"
10157  "@
10158   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10159   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10160   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10161   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10162  [(set_attr "type" "sselog")
10163   (set_attr "prefix_data16" "1")
10164   (set_attr "prefix_extra" "1")
10165   (set_attr "length_immediate" "1")
10166   (set_attr "memory" "none,load,none,load")
10167   (set_attr "prefix" "maybe_vex")
10168   (set_attr "mode" "TI")])
10169
10170(define_insn_and_split "sse4_2_pcmpistr"
10171  [(set (match_operand:SI 0 "register_operand" "=c,c")
10172	(unspec:SI
10173	  [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10174	   (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10175	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10176	  UNSPEC_PCMPISTR))
10177   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10178	(unspec:V16QI
10179	  [(match_dup 2)
10180	   (match_dup 3)
10181	   (match_dup 4)]
10182	  UNSPEC_PCMPISTR))
10183   (set (reg:CC FLAGS_REG)
10184	(unspec:CC
10185	  [(match_dup 2)
10186	   (match_dup 3)
10187	   (match_dup 4)]
10188	  UNSPEC_PCMPISTR))]
10189  "TARGET_SSE4_2
10190   && can_create_pseudo_p ()"
10191  "#"
10192  "&& 1"
10193  [(const_int 0)]
10194{
10195  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10196  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10197  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10198
10199  if (ecx)
10200    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10201				     operands[3], operands[4]));
10202  if (xmm0)
10203    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10204				     operands[3], operands[4]));
10205  if (flags && !(ecx || xmm0))
10206    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10207					   operands[2], operands[3],
10208					   operands[4]));
10209  if (!(flags || ecx || xmm0))
10210    emit_note (NOTE_INSN_DELETED);
10211
10212  DONE;
10213}
10214  [(set_attr "type" "sselog")
10215   (set_attr "prefix_data16" "1")
10216   (set_attr "prefix_extra" "1")
10217   (set_attr "length_immediate" "1")
10218   (set_attr "memory" "none,load")
10219   (set_attr "mode" "TI")])
10220
10221(define_insn "sse4_2_pcmpistri"
10222  [(set (match_operand:SI 0 "register_operand" "=c,c")
10223	(unspec:SI
10224	  [(match_operand:V16QI 1 "register_operand" "x,x")
10225	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10226	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10227	  UNSPEC_PCMPISTR))
10228   (set (reg:CC FLAGS_REG)
10229	(unspec:CC
10230	  [(match_dup 1)
10231	   (match_dup 2)
10232	   (match_dup 3)]
10233	  UNSPEC_PCMPISTR))]
10234  "TARGET_SSE4_2"
10235  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10236  [(set_attr "type" "sselog")
10237   (set_attr "prefix_data16" "1")
10238   (set_attr "prefix_extra" "1")
10239   (set_attr "length_immediate" "1")
10240   (set_attr "prefix" "maybe_vex")
10241   (set_attr "memory" "none,load")
10242   (set_attr "mode" "TI")])
10243
10244(define_insn "sse4_2_pcmpistrm"
10245  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10246	(unspec:V16QI
10247	  [(match_operand:V16QI 1 "register_operand" "x,x")
10248	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10249	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10250	  UNSPEC_PCMPISTR))
10251   (set (reg:CC FLAGS_REG)
10252	(unspec:CC
10253	  [(match_dup 1)
10254	   (match_dup 2)
10255	   (match_dup 3)]
10256	  UNSPEC_PCMPISTR))]
10257  "TARGET_SSE4_2"
10258  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10259  [(set_attr "type" "sselog")
10260   (set_attr "prefix_data16" "1")
10261   (set_attr "prefix_extra" "1")
10262   (set_attr "length_immediate" "1")
10263   (set_attr "prefix" "maybe_vex")
10264   (set_attr "memory" "none,load")
10265   (set_attr "mode" "TI")])
10266
10267(define_insn "sse4_2_pcmpistr_cconly"
10268  [(set (reg:CC FLAGS_REG)
10269	(unspec:CC
10270	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10271	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10272	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10273	  UNSPEC_PCMPISTR))
10274   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10275   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10276  "TARGET_SSE4_2"
10277  "@
10278   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10279   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10280   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10281   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10282  [(set_attr "type" "sselog")
10283   (set_attr "prefix_data16" "1")
10284   (set_attr "prefix_extra" "1")
10285   (set_attr "length_immediate" "1")
10286   (set_attr "memory" "none,load,none,load")
10287   (set_attr "prefix" "maybe_vex")
10288   (set_attr "mode" "TI")])
10289
10290;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10291;;
10292;; XOP instructions
10293;;
10294;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10295
10296;; XOP parallel integer multiply/add instructions.
10297;; Note the XOP multiply/add instructions
10298;;     a[i] = b[i] * c[i] + d[i];
10299;; do not allow the value being added to be a memory operation.
10300(define_insn "xop_pmacsww"
10301  [(set (match_operand:V8HI 0 "register_operand" "=x")
10302	(plus:V8HI
10303	 (mult:V8HI
10304	  (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10305	  (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10306	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10307  "TARGET_XOP"
10308  "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10309  [(set_attr "type" "ssemuladd")
10310   (set_attr "mode" "TI")])
10311
10312(define_insn "xop_pmacssww"
10313  [(set (match_operand:V8HI 0 "register_operand" "=x")
10314	(ss_plus:V8HI
10315	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10316		    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10317	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10318  "TARGET_XOP"
10319  "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10320  [(set_attr "type" "ssemuladd")
10321   (set_attr "mode" "TI")])
10322
10323(define_insn "xop_pmacsdd"
10324  [(set (match_operand:V4SI 0 "register_operand" "=x")
10325	(plus:V4SI
10326	 (mult:V4SI
10327	  (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10328	  (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10329	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10330  "TARGET_XOP"
10331  "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10332  [(set_attr "type" "ssemuladd")
10333   (set_attr "mode" "TI")])
10334
10335(define_insn "xop_pmacssdd"
10336  [(set (match_operand:V4SI 0 "register_operand" "=x")
10337	(ss_plus:V4SI
10338	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10339		    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10340	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10341  "TARGET_XOP"
10342  "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10343  [(set_attr "type" "ssemuladd")
10344   (set_attr "mode" "TI")])
10345
10346(define_insn "xop_pmacssdql"
10347  [(set (match_operand:V2DI 0 "register_operand" "=x")
10348	(ss_plus:V2DI
10349	 (mult:V2DI
10350	  (sign_extend:V2DI
10351	   (vec_select:V2SI
10352	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10353            (parallel [(const_int 0)
10354                       (const_int 2)])))
10355	   (vec_select:V2SI
10356	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10357	   (parallel [(const_int 0)
10358		      (const_int 2)])))
10359	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10360  "TARGET_XOP"
10361  "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10362  [(set_attr "type" "ssemuladd")
10363   (set_attr "mode" "TI")])
10364
10365(define_insn "xop_pmacssdqh"
10366  [(set (match_operand:V2DI 0 "register_operand" "=x")
10367	(ss_plus:V2DI
10368	 (mult:V2DI
10369	  (sign_extend:V2DI
10370	   (vec_select:V2SI
10371	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10372	    (parallel [(const_int 1)
10373		       (const_int 3)])))
10374	  (sign_extend:V2DI
10375	   (vec_select:V2SI
10376	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10377	    (parallel [(const_int 1)
10378		       (const_int 3)]))))
10379	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10380  "TARGET_XOP"
10381  "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10382  [(set_attr "type" "ssemuladd")
10383   (set_attr "mode" "TI")])
10384
10385(define_insn "xop_pmacsdql"
10386  [(set (match_operand:V2DI 0 "register_operand" "=x")
10387	(plus:V2DI
10388	 (mult:V2DI
10389	  (sign_extend:V2DI
10390	   (vec_select:V2SI
10391	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10392	    (parallel [(const_int 0)
10393		       (const_int 2)])))
10394	  (sign_extend:V2DI
10395	   (vec_select:V2SI
10396	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10397	    (parallel [(const_int 0)
10398		       (const_int 2)]))))
10399	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10400  "TARGET_XOP"
10401  "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10402  [(set_attr "type" "ssemuladd")
10403   (set_attr "mode" "TI")])
10404
10405(define_insn "xop_pmacsdqh"
10406  [(set (match_operand:V2DI 0 "register_operand" "=x")
10407	(plus:V2DI
10408	 (mult:V2DI
10409	  (sign_extend:V2DI
10410	   (vec_select:V2SI
10411	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10412	    (parallel [(const_int 1)
10413		       (const_int 3)])))
10414	  (sign_extend:V2DI
10415	   (vec_select:V2SI
10416	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10417	    (parallel [(const_int 1)
10418		       (const_int 3)]))))
10419	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10420  "TARGET_XOP"
10421  "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10422  [(set_attr "type" "ssemuladd")
10423   (set_attr "mode" "TI")])
10424
10425;; XOP parallel integer multiply/add instructions for the intrinisics
10426(define_insn "xop_pmacsswd"
10427  [(set (match_operand:V4SI 0 "register_operand" "=x")
10428	(ss_plus:V4SI
10429	 (mult:V4SI
10430	  (sign_extend:V4SI
10431	   (vec_select:V4HI
10432	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10433	    (parallel [(const_int 1)
10434		       (const_int 3)
10435		       (const_int 5)
10436		       (const_int 7)])))
10437	  (sign_extend:V4SI
10438	   (vec_select:V4HI
10439	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10440	    (parallel [(const_int 1)
10441		       (const_int 3)
10442		       (const_int 5)
10443		       (const_int 7)]))))
10444	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10445  "TARGET_XOP"
10446  "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10447  [(set_attr "type" "ssemuladd")
10448   (set_attr "mode" "TI")])
10449
10450(define_insn "xop_pmacswd"
10451  [(set (match_operand:V4SI 0 "register_operand" "=x")
10452	(plus:V4SI
10453	 (mult:V4SI
10454	  (sign_extend:V4SI
10455	   (vec_select:V4HI
10456	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10457	    (parallel [(const_int 1)
10458		       (const_int 3)
10459		       (const_int 5)
10460		       (const_int 7)])))
10461	  (sign_extend:V4SI
10462	   (vec_select:V4HI
10463	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10464	    (parallel [(const_int 1)
10465		       (const_int 3)
10466		       (const_int 5)
10467		       (const_int 7)]))))
10468	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10469  "TARGET_XOP"
10470  "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10471  [(set_attr "type" "ssemuladd")
10472   (set_attr "mode" "TI")])
10473
10474(define_insn "xop_pmadcsswd"
10475  [(set (match_operand:V4SI 0 "register_operand" "=x")
10476	(ss_plus:V4SI
10477	 (plus:V4SI
10478	  (mult:V4SI
10479	   (sign_extend:V4SI
10480	    (vec_select:V4HI
10481	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10482	     (parallel [(const_int 0)
10483			(const_int 2)
10484			(const_int 4)
10485			(const_int 6)])))
10486	   (sign_extend:V4SI
10487	    (vec_select:V4HI
10488	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10489	     (parallel [(const_int 0)
10490			(const_int 2)
10491			(const_int 4)
10492			(const_int 6)]))))
10493	  (mult:V4SI
10494	   (sign_extend:V4SI
10495	    (vec_select:V4HI
10496	     (match_dup 1)
10497	     (parallel [(const_int 1)
10498			(const_int 3)
10499			(const_int 5)
10500			(const_int 7)])))
10501	   (sign_extend:V4SI
10502	    (vec_select:V4HI
10503	     (match_dup 2)
10504	     (parallel [(const_int 1)
10505			(const_int 3)
10506			(const_int 5)
10507			(const_int 7)])))))
10508	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10509  "TARGET_XOP"
10510  "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10511  [(set_attr "type" "ssemuladd")
10512   (set_attr "mode" "TI")])
10513
10514(define_insn "xop_pmadcswd"
10515  [(set (match_operand:V4SI 0 "register_operand" "=x")
10516	(plus:V4SI
10517	 (plus:V4SI
10518	  (mult:V4SI
10519	   (sign_extend:V4SI
10520	    (vec_select:V4HI
10521	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10522	     (parallel [(const_int 0)
10523			(const_int 2)
10524			(const_int 4)
10525			(const_int 6)])))
10526	   (sign_extend:V4SI
10527	    (vec_select:V4HI
10528	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10529	     (parallel [(const_int 0)
10530			(const_int 2)
10531			(const_int 4)
10532			(const_int 6)]))))
10533	  (mult:V4SI
10534	   (sign_extend:V4SI
10535	    (vec_select:V4HI
10536	     (match_dup 1)
10537	     (parallel [(const_int 1)
10538			(const_int 3)
10539			(const_int 5)
10540			(const_int 7)])))
10541	   (sign_extend:V4SI
10542	    (vec_select:V4HI
10543	     (match_dup 2)
10544	     (parallel [(const_int 1)
10545			(const_int 3)
10546			(const_int 5)
10547			(const_int 7)])))))
10548	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10549  "TARGET_XOP"
10550  "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10551  [(set_attr "type" "ssemuladd")
10552   (set_attr "mode" "TI")])
10553
10554;; XOP parallel XMM conditional moves
10555(define_insn "xop_pcmov_<mode><avxsizesuffix>"
10556  [(set (match_operand:V 0 "register_operand" "=x,x")
10557	(if_then_else:V
10558	  (match_operand:V 3 "nonimmediate_operand" "x,m")
10559	  (match_operand:V 1 "register_operand" "x,x")
10560	  (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10561  "TARGET_XOP"
10562  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10563  [(set_attr "type" "sse4arg")])
10564
10565;; XOP horizontal add/subtract instructions
10566(define_insn "xop_phaddbw"
10567  [(set (match_operand:V8HI 0 "register_operand" "=x")
10568	(plus:V8HI
10569	 (sign_extend:V8HI
10570	  (vec_select:V8QI
10571	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10572	   (parallel [(const_int 0)
10573		      (const_int 2)
10574		      (const_int 4)
10575		      (const_int 6)
10576		      (const_int 8)
10577		      (const_int 10)
10578		      (const_int 12)
10579		      (const_int 14)])))
10580	 (sign_extend:V8HI
10581	  (vec_select:V8QI
10582	   (match_dup 1)
10583	   (parallel [(const_int 1)
10584		      (const_int 3)
10585		      (const_int 5)
10586		      (const_int 7)
10587		      (const_int 9)
10588		      (const_int 11)
10589		      (const_int 13)
10590		      (const_int 15)])))))]
10591  "TARGET_XOP"
10592  "vphaddbw\t{%1, %0|%0, %1}"
10593  [(set_attr "type" "sseiadd1")])
10594
10595(define_insn "xop_phaddbd"
10596  [(set (match_operand:V4SI 0 "register_operand" "=x")
10597	(plus:V4SI
10598	 (plus:V4SI
10599	  (sign_extend:V4SI
10600	   (vec_select:V4QI
10601	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10602	    (parallel [(const_int 0)
10603		       (const_int 4)
10604		       (const_int 8)
10605		       (const_int 12)])))
10606	  (sign_extend:V4SI
10607	   (vec_select:V4QI
10608	    (match_dup 1)
10609	    (parallel [(const_int 1)
10610		       (const_int 5)
10611		       (const_int 9)
10612		       (const_int 13)]))))
10613	 (plus:V4SI
10614	  (sign_extend:V4SI
10615	   (vec_select:V4QI
10616	    (match_dup 1)
10617	    (parallel [(const_int 2)
10618		       (const_int 6)
10619		       (const_int 10)
10620		       (const_int 14)])))
10621	  (sign_extend:V4SI
10622	   (vec_select:V4QI
10623	    (match_dup 1)
10624	    (parallel [(const_int 3)
10625		       (const_int 7)
10626		       (const_int 11)
10627		       (const_int 15)]))))))]
10628  "TARGET_XOP"
10629  "vphaddbd\t{%1, %0|%0, %1}"
10630  [(set_attr "type" "sseiadd1")])
10631
10632(define_insn "xop_phaddbq"
10633  [(set (match_operand:V2DI 0 "register_operand" "=x")
10634	(plus:V2DI
10635	 (plus:V2DI
10636	  (plus:V2DI
10637	   (sign_extend:V2DI
10638	    (vec_select:V2QI
10639	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10640	     (parallel [(const_int 0)
10641			(const_int 8)])))
10642	   (sign_extend:V2DI
10643	    (vec_select:V2QI
10644	     (match_dup 1)
10645	     (parallel [(const_int 1)
10646			(const_int 9)]))))
10647	  (plus:V2DI
10648	   (sign_extend:V2DI
10649	    (vec_select:V2QI
10650	     (match_dup 1)
10651	     (parallel [(const_int 2)
10652			(const_int 10)])))
10653	   (sign_extend:V2DI
10654	    (vec_select:V2QI
10655	     (match_dup 1)
10656	     (parallel [(const_int 3)
10657			(const_int 11)])))))
10658	 (plus:V2DI
10659	  (plus:V2DI
10660	   (sign_extend:V2DI
10661	    (vec_select:V2QI
10662	     (match_dup 1)
10663	     (parallel [(const_int 4)
10664			(const_int 12)])))
10665	   (sign_extend:V2DI
10666	    (vec_select:V2QI
10667	     (match_dup 1)
10668	     (parallel [(const_int 5)
10669			(const_int 13)]))))
10670	  (plus:V2DI
10671	   (sign_extend:V2DI
10672	    (vec_select:V2QI
10673	     (match_dup 1)
10674	     (parallel [(const_int 6)
10675			(const_int 14)])))
10676	   (sign_extend:V2DI
10677	    (vec_select:V2QI
10678	     (match_dup 1)
10679	     (parallel [(const_int 7)
10680			(const_int 15)])))))))]
10681  "TARGET_XOP"
10682  "vphaddbq\t{%1, %0|%0, %1}"
10683  [(set_attr "type" "sseiadd1")])
10684
10685(define_insn "xop_phaddwd"
10686  [(set (match_operand:V4SI 0 "register_operand" "=x")
10687	(plus:V4SI
10688	 (sign_extend:V4SI
10689	  (vec_select:V4HI
10690	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10691	   (parallel [(const_int 0)
10692		      (const_int 2)
10693		      (const_int 4)
10694		      (const_int 6)])))
10695	 (sign_extend:V4SI
10696	  (vec_select:V4HI
10697	   (match_dup 1)
10698	   (parallel [(const_int 1)
10699		      (const_int 3)
10700		      (const_int 5)
10701		      (const_int 7)])))))]
10702  "TARGET_XOP"
10703  "vphaddwd\t{%1, %0|%0, %1}"
10704  [(set_attr "type" "sseiadd1")])
10705
10706(define_insn "xop_phaddwq"
10707  [(set (match_operand:V2DI 0 "register_operand" "=x")
10708	(plus:V2DI
10709	 (plus:V2DI
10710	  (sign_extend:V2DI
10711	   (vec_select:V2HI
10712	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10713	    (parallel [(const_int 0)
10714		       (const_int 4)])))
10715	  (sign_extend:V2DI
10716	   (vec_select:V2HI
10717	    (match_dup 1)
10718	    (parallel [(const_int 1)
10719		       (const_int 5)]))))
10720	 (plus:V2DI
10721	  (sign_extend:V2DI
10722	   (vec_select:V2HI
10723	    (match_dup 1)
10724	    (parallel [(const_int 2)
10725		       (const_int 6)])))
10726	  (sign_extend:V2DI
10727	   (vec_select:V2HI
10728	    (match_dup 1)
10729	    (parallel [(const_int 3)
10730		       (const_int 7)]))))))]
10731  "TARGET_XOP"
10732  "vphaddwq\t{%1, %0|%0, %1}"
10733  [(set_attr "type" "sseiadd1")])
10734
10735(define_insn "xop_phadddq"
10736  [(set (match_operand:V2DI 0 "register_operand" "=x")
10737	(plus:V2DI
10738	 (sign_extend:V2DI
10739	  (vec_select:V2SI
10740	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10741	   (parallel [(const_int 0)
10742		      (const_int 2)])))
10743	 (sign_extend:V2DI
10744	  (vec_select:V2SI
10745	   (match_dup 1)
10746	   (parallel [(const_int 1)
10747		      (const_int 3)])))))]
10748  "TARGET_XOP"
10749  "vphadddq\t{%1, %0|%0, %1}"
10750  [(set_attr "type" "sseiadd1")])
10751
10752(define_insn "xop_phaddubw"
10753  [(set (match_operand:V8HI 0 "register_operand" "=x")
10754	(plus:V8HI
10755	 (zero_extend:V8HI
10756	  (vec_select:V8QI
10757	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10758	   (parallel [(const_int 0)
10759		      (const_int 2)
10760		      (const_int 4)
10761		      (const_int 6)
10762		      (const_int 8)
10763		      (const_int 10)
10764		      (const_int 12)
10765		      (const_int 14)])))
10766	 (zero_extend:V8HI
10767	  (vec_select:V8QI
10768	   (match_dup 1)
10769	   (parallel [(const_int 1)
10770		      (const_int 3)
10771		      (const_int 5)
10772		      (const_int 7)
10773		      (const_int 9)
10774		      (const_int 11)
10775		      (const_int 13)
10776		      (const_int 15)])))))]
10777  "TARGET_XOP"
10778  "vphaddubw\t{%1, %0|%0, %1}"
10779  [(set_attr "type" "sseiadd1")])
10780
10781(define_insn "xop_phaddubd"
10782  [(set (match_operand:V4SI 0 "register_operand" "=x")
10783	(plus:V4SI
10784	 (plus:V4SI
10785	  (zero_extend:V4SI
10786	   (vec_select:V4QI
10787	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10788	    (parallel [(const_int 0)
10789		       (const_int 4)
10790		       (const_int 8)
10791		       (const_int 12)])))
10792	  (zero_extend:V4SI
10793	   (vec_select:V4QI
10794	    (match_dup 1)
10795	    (parallel [(const_int 1)
10796		       (const_int 5)
10797		       (const_int 9)
10798		       (const_int 13)]))))
10799	 (plus:V4SI
10800	  (zero_extend:V4SI
10801	   (vec_select:V4QI
10802	    (match_dup 1)
10803	    (parallel [(const_int 2)
10804		       (const_int 6)
10805		       (const_int 10)
10806		       (const_int 14)])))
10807	  (zero_extend:V4SI
10808	   (vec_select:V4QI
10809	    (match_dup 1)
10810	    (parallel [(const_int 3)
10811		       (const_int 7)
10812		       (const_int 11)
10813		       (const_int 15)]))))))]
10814  "TARGET_XOP"
10815  "vphaddubd\t{%1, %0|%0, %1}"
10816  [(set_attr "type" "sseiadd1")])
10817
10818(define_insn "xop_phaddubq"
10819  [(set (match_operand:V2DI 0 "register_operand" "=x")
10820	(plus:V2DI
10821	 (plus:V2DI
10822	  (plus:V2DI
10823	   (zero_extend:V2DI
10824	    (vec_select:V2QI
10825	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10826	     (parallel [(const_int 0)
10827			(const_int 8)])))
10828	   (sign_extend:V2DI
10829	    (vec_select:V2QI
10830	     (match_dup 1)
10831	     (parallel [(const_int 1)
10832			(const_int 9)]))))
10833	  (plus:V2DI
10834	   (zero_extend:V2DI
10835	    (vec_select:V2QI
10836	     (match_dup 1)
10837	     (parallel [(const_int 2)
10838			(const_int 10)])))
10839	   (zero_extend:V2DI
10840	    (vec_select:V2QI
10841	     (match_dup 1)
10842	     (parallel [(const_int 3)
10843			(const_int 11)])))))
10844	 (plus:V2DI
10845	  (plus:V2DI
10846	   (zero_extend:V2DI
10847	    (vec_select:V2QI
10848	     (match_dup 1)
10849	     (parallel [(const_int 4)
10850			(const_int 12)])))
10851	   (sign_extend:V2DI
10852	    (vec_select:V2QI
10853	     (match_dup 1)
10854	     (parallel [(const_int 5)
10855			(const_int 13)]))))
10856	  (plus:V2DI
10857	   (zero_extend:V2DI
10858	    (vec_select:V2QI
10859	     (match_dup 1)
10860	     (parallel [(const_int 6)
10861			(const_int 14)])))
10862	   (zero_extend:V2DI
10863	    (vec_select:V2QI
10864	     (match_dup 1)
10865	     (parallel [(const_int 7)
10866			(const_int 15)])))))))]
10867  "TARGET_XOP"
10868  "vphaddubq\t{%1, %0|%0, %1}"
10869  [(set_attr "type" "sseiadd1")])
10870
10871(define_insn "xop_phadduwd"
10872  [(set (match_operand:V4SI 0 "register_operand" "=x")
10873	(plus:V4SI
10874	 (zero_extend:V4SI
10875	  (vec_select:V4HI
10876	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10877	   (parallel [(const_int 0)
10878		      (const_int 2)
10879		      (const_int 4)
10880		      (const_int 6)])))
10881	 (zero_extend:V4SI
10882	  (vec_select:V4HI
10883	   (match_dup 1)
10884	   (parallel [(const_int 1)
10885		      (const_int 3)
10886		      (const_int 5)
10887		      (const_int 7)])))))]
10888  "TARGET_XOP"
10889  "vphadduwd\t{%1, %0|%0, %1}"
10890  [(set_attr "type" "sseiadd1")])
10891
10892(define_insn "xop_phadduwq"
10893  [(set (match_operand:V2DI 0 "register_operand" "=x")
10894	(plus:V2DI
10895	 (plus:V2DI
10896	  (zero_extend:V2DI
10897	   (vec_select:V2HI
10898	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10899	    (parallel [(const_int 0)
10900		       (const_int 4)])))
10901	  (zero_extend:V2DI
10902	   (vec_select:V2HI
10903	    (match_dup 1)
10904	    (parallel [(const_int 1)
10905		       (const_int 5)]))))
10906	 (plus:V2DI
10907	  (zero_extend:V2DI
10908	   (vec_select:V2HI
10909	    (match_dup 1)
10910	    (parallel [(const_int 2)
10911		       (const_int 6)])))
10912	  (zero_extend:V2DI
10913	   (vec_select:V2HI
10914	    (match_dup 1)
10915	    (parallel [(const_int 3)
10916		       (const_int 7)]))))))]
10917  "TARGET_XOP"
10918  "vphadduwq\t{%1, %0|%0, %1}"
10919  [(set_attr "type" "sseiadd1")])
10920
10921(define_insn "xop_phaddudq"
10922  [(set (match_operand:V2DI 0 "register_operand" "=x")
10923	(plus:V2DI
10924	 (zero_extend:V2DI
10925	  (vec_select:V2SI
10926	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10927	   (parallel [(const_int 0)
10928		      (const_int 2)])))
10929	 (zero_extend:V2DI
10930	  (vec_select:V2SI
10931	   (match_dup 1)
10932	   (parallel [(const_int 1)
10933		      (const_int 3)])))))]
10934  "TARGET_XOP"
10935  "vphaddudq\t{%1, %0|%0, %1}"
10936  [(set_attr "type" "sseiadd1")])
10937
10938(define_insn "xop_phsubbw"
10939  [(set (match_operand:V8HI 0 "register_operand" "=x")
10940	(minus:V8HI
10941	 (sign_extend:V8HI
10942	  (vec_select:V8QI
10943	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10944	   (parallel [(const_int 0)
10945		      (const_int 2)
10946		      (const_int 4)
10947		      (const_int 6)
10948		      (const_int 8)
10949		      (const_int 10)
10950		      (const_int 12)
10951		      (const_int 14)])))
10952	 (sign_extend:V8HI
10953	  (vec_select:V8QI
10954	   (match_dup 1)
10955	   (parallel [(const_int 1)
10956		      (const_int 3)
10957		      (const_int 5)
10958		      (const_int 7)
10959		      (const_int 9)
10960		      (const_int 11)
10961		      (const_int 13)
10962		      (const_int 15)])))))]
10963  "TARGET_XOP"
10964  "vphsubbw\t{%1, %0|%0, %1}"
10965  [(set_attr "type" "sseiadd1")])
10966
10967(define_insn "xop_phsubwd"
10968  [(set (match_operand:V4SI 0 "register_operand" "=x")
10969	(minus:V4SI
10970	 (sign_extend:V4SI
10971	  (vec_select:V4HI
10972	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10973	   (parallel [(const_int 0)
10974		      (const_int 2)
10975		      (const_int 4)
10976		      (const_int 6)])))
10977	 (sign_extend:V4SI
10978	  (vec_select:V4HI
10979	   (match_dup 1)
10980	   (parallel [(const_int 1)
10981		      (const_int 3)
10982		      (const_int 5)
10983		      (const_int 7)])))))]
10984  "TARGET_XOP"
10985  "vphsubwd\t{%1, %0|%0, %1}"
10986  [(set_attr "type" "sseiadd1")])
10987
10988(define_insn "xop_phsubdq"
10989  [(set (match_operand:V2DI 0 "register_operand" "=x")
10990	(minus:V2DI
10991	 (sign_extend:V2DI
10992	  (vec_select:V2SI
10993	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10994	   (parallel [(const_int 0)
10995		      (const_int 2)])))
10996	 (sign_extend:V2DI
10997	  (vec_select:V2SI
10998	   (match_dup 1)
10999	   (parallel [(const_int 1)
11000		      (const_int 3)])))))]
11001  "TARGET_XOP"
11002  "vphsubdq\t{%1, %0|%0, %1}"
11003  [(set_attr "type" "sseiadd1")])
11004
11005;; XOP permute instructions
11006(define_insn "xop_pperm"
11007  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11008	(unspec:V16QI
11009	  [(match_operand:V16QI 1 "register_operand" "x,x")
11010	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11011	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11012	  UNSPEC_XOP_PERMUTE))]
11013  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11014  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11015  [(set_attr "type" "sse4arg")
11016   (set_attr "mode" "TI")])
11017
11018;; XOP pack instructions that combine two vectors into a smaller vector
11019(define_insn "xop_pperm_pack_v2di_v4si"
11020  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11021	(vec_concat:V4SI
11022	 (truncate:V2SI
11023	  (match_operand:V2DI 1 "register_operand" "x,x"))
11024	 (truncate:V2SI
11025	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11026   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11027  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11028  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11029  [(set_attr "type" "sse4arg")
11030   (set_attr "mode" "TI")])
11031
11032(define_insn "xop_pperm_pack_v4si_v8hi"
11033  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11034	(vec_concat:V8HI
11035	 (truncate:V4HI
11036	  (match_operand:V4SI 1 "register_operand" "x,x"))
11037	 (truncate:V4HI
11038	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11039   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11040  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11041  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11042  [(set_attr "type" "sse4arg")
11043   (set_attr "mode" "TI")])
11044
11045(define_insn "xop_pperm_pack_v8hi_v16qi"
11046  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11047	(vec_concat:V16QI
11048	 (truncate:V8QI
11049	  (match_operand:V8HI 1 "register_operand" "x,x"))
11050	 (truncate:V8QI
11051	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11052   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11053  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11054  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11055  [(set_attr "type" "sse4arg")
11056   (set_attr "mode" "TI")])
11057
11058;; XOP packed rotate instructions
11059(define_expand "rotl<mode>3"
11060  [(set (match_operand:VI_128 0 "register_operand" "")
11061	(rotate:VI_128
11062	 (match_operand:VI_128 1 "nonimmediate_operand" "")
11063	 (match_operand:SI 2 "general_operand")))]
11064  "TARGET_XOP"
11065{
11066  /* If we were given a scalar, convert it to parallel */
11067  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11068    {
11069      rtvec vs = rtvec_alloc (<ssescalarnum>);
11070      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11071      rtx reg = gen_reg_rtx (<MODE>mode);
11072      rtx op2 = operands[2];
11073      int i;
11074
11075      if (GET_MODE (op2) != <ssescalarmode>mode)
11076	{
11077	  op2 = gen_reg_rtx (<ssescalarmode>mode);
11078	  convert_move (op2, operands[2], false);
11079	}
11080
11081      for (i = 0; i < <ssescalarnum>; i++)
11082	RTVEC_ELT (vs, i) = op2;
11083
11084      emit_insn (gen_vec_init<mode> (reg, par));
11085      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11086      DONE;
11087    }
11088})
11089
11090(define_expand "rotr<mode>3"
11091  [(set (match_operand:VI_128 0 "register_operand" "")
11092	(rotatert:VI_128
11093	 (match_operand:VI_128 1 "nonimmediate_operand" "")
11094	 (match_operand:SI 2 "general_operand")))]
11095  "TARGET_XOP"
11096{
11097  /* If we were given a scalar, convert it to parallel */
11098  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11099    {
11100      rtvec vs = rtvec_alloc (<ssescalarnum>);
11101      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11102      rtx neg = gen_reg_rtx (<MODE>mode);
11103      rtx reg = gen_reg_rtx (<MODE>mode);
11104      rtx op2 = operands[2];
11105      int i;
11106
11107      if (GET_MODE (op2) != <ssescalarmode>mode)
11108	{
11109	  op2 = gen_reg_rtx (<ssescalarmode>mode);
11110	  convert_move (op2, operands[2], false);
11111	}
11112
11113      for (i = 0; i < <ssescalarnum>; i++)
11114	RTVEC_ELT (vs, i) = op2;
11115
11116      emit_insn (gen_vec_init<mode> (reg, par));
11117      emit_insn (gen_neg<mode>2 (neg, reg));
11118      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11119      DONE;
11120    }
11121})
11122
11123(define_insn "xop_rotl<mode>3"
11124  [(set (match_operand:VI_128 0 "register_operand" "=x")
11125	(rotate:VI_128
11126	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11127	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11128  "TARGET_XOP"
11129  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11130  [(set_attr "type" "sseishft")
11131   (set_attr "length_immediate" "1")
11132   (set_attr "mode" "TI")])
11133
11134(define_insn "xop_rotr<mode>3"
11135  [(set (match_operand:VI_128 0 "register_operand" "=x")
11136	(rotatert:VI_128
11137	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11138	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11139  "TARGET_XOP"
11140{
11141  operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11142  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11143}
11144  [(set_attr "type" "sseishft")
11145   (set_attr "length_immediate" "1")
11146   (set_attr "mode" "TI")])
11147
11148(define_expand "vrotr<mode>3"
11149  [(match_operand:VI_128 0 "register_operand" "")
11150   (match_operand:VI_128 1 "register_operand" "")
11151   (match_operand:VI_128 2 "register_operand" "")]
11152  "TARGET_XOP"
11153{
11154  rtx reg = gen_reg_rtx (<MODE>mode);
11155  emit_insn (gen_neg<mode>2 (reg, operands[2]));
11156  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11157  DONE;
11158})
11159
11160(define_expand "vrotl<mode>3"
11161  [(match_operand:VI_128 0 "register_operand" "")
11162   (match_operand:VI_128 1 "register_operand" "")
11163   (match_operand:VI_128 2 "register_operand" "")]
11164  "TARGET_XOP"
11165{
11166  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11167  DONE;
11168})
11169
11170(define_insn "xop_vrotl<mode>3"
11171  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11172	(if_then_else:VI_128
11173	 (ge:VI_128
11174	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11175	  (const_int 0))
11176	 (rotate:VI_128
11177	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11178	  (match_dup 2))
11179	 (rotatert:VI_128
11180	  (match_dup 1)
11181	  (neg:VI_128 (match_dup 2)))))]
11182  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11183  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11184  [(set_attr "type" "sseishft")
11185   (set_attr "prefix_data16" "0")
11186   (set_attr "prefix_extra" "2")
11187   (set_attr "mode" "TI")])
11188
11189;; XOP packed shift instructions.
11190(define_expand "vlshr<mode>3"
11191  [(set (match_operand:VI12_128 0 "register_operand" "")
11192	(lshiftrt:VI12_128
11193	  (match_operand:VI12_128 1 "register_operand" "")
11194	  (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11195  "TARGET_XOP"
11196{
11197  rtx neg = gen_reg_rtx (<MODE>mode);
11198  emit_insn (gen_neg<mode>2 (neg, operands[2]));
11199  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11200  DONE;
11201})
11202
11203(define_expand "vlshr<mode>3"
11204  [(set (match_operand:VI48_128 0 "register_operand" "")
11205	(lshiftrt:VI48_128
11206	  (match_operand:VI48_128 1 "register_operand" "")
11207	  (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11208  "TARGET_AVX2 || TARGET_XOP"
11209{
11210  if (!TARGET_AVX2)
11211    {
11212      rtx neg = gen_reg_rtx (<MODE>mode);
11213      emit_insn (gen_neg<mode>2 (neg, operands[2]));
11214      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11215      DONE;
11216    }
11217})
11218
11219(define_expand "vlshr<mode>3"
11220  [(set (match_operand:VI48_256 0 "register_operand" "")
11221	(lshiftrt:VI48_256
11222	  (match_operand:VI48_256 1 "register_operand" "")
11223	  (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11224  "TARGET_AVX2")
11225
11226(define_expand "vashr<mode>3"
11227  [(set (match_operand:VI128_128 0 "register_operand" "")
11228	(ashiftrt:VI128_128
11229	  (match_operand:VI128_128 1 "register_operand" "")
11230	  (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11231  "TARGET_XOP"
11232{
11233  rtx neg = gen_reg_rtx (<MODE>mode);
11234  emit_insn (gen_neg<mode>2 (neg, operands[2]));
11235  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11236  DONE;
11237})
11238
11239(define_expand "vashrv4si3"
11240  [(set (match_operand:V4SI 0 "register_operand" "")
11241	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11242		       (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11243  "TARGET_AVX2 || TARGET_XOP"
11244{
11245  if (!TARGET_AVX2)
11246    {
11247      rtx neg = gen_reg_rtx (V4SImode);
11248      emit_insn (gen_negv4si2 (neg, operands[2]));
11249      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11250      DONE;
11251    }
11252})
11253
11254(define_expand "vashrv8si3"
11255  [(set (match_operand:V8SI 0 "register_operand" "")
11256	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11257		       (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11258  "TARGET_AVX2")
11259
11260(define_expand "vashl<mode>3"
11261  [(set (match_operand:VI12_128 0 "register_operand" "")
11262	(ashift:VI12_128
11263	  (match_operand:VI12_128 1 "register_operand" "")
11264	  (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11265  "TARGET_XOP"
11266{
11267  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11268  DONE;
11269})
11270
11271(define_expand "vashl<mode>3"
11272  [(set (match_operand:VI48_128 0 "register_operand" "")
11273	(ashift:VI48_128
11274	  (match_operand:VI48_128 1 "register_operand" "")
11275	  (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11276  "TARGET_AVX2 || TARGET_XOP"
11277{
11278  if (!TARGET_AVX2)
11279    {
11280      operands[2] = force_reg (<MODE>mode, operands[2]);
11281      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11282      DONE;
11283    }
11284})
11285
11286(define_expand "vashl<mode>3"
11287  [(set (match_operand:VI48_256 0 "register_operand" "")
11288	(ashift:VI48_256
11289	  (match_operand:VI48_256 1 "register_operand" "")
11290	  (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11291  "TARGET_AVX2")
11292
11293(define_insn "xop_sha<mode>3"
11294  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11295	(if_then_else:VI_128
11296	 (ge:VI_128
11297	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11298	  (const_int 0))
11299	 (ashift:VI_128
11300	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11301	  (match_dup 2))
11302	 (ashiftrt:VI_128
11303	  (match_dup 1)
11304	  (neg:VI_128 (match_dup 2)))))]
11305  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11306  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11307  [(set_attr "type" "sseishft")
11308   (set_attr "prefix_data16" "0")
11309   (set_attr "prefix_extra" "2")
11310   (set_attr "mode" "TI")])
11311
11312(define_insn "xop_shl<mode>3"
11313  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11314	(if_then_else:VI_128
11315	 (ge:VI_128
11316	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11317	  (const_int 0))
11318	 (ashift:VI_128
11319	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11320	  (match_dup 2))
11321	 (lshiftrt:VI_128
11322	  (match_dup 1)
11323	  (neg:VI_128 (match_dup 2)))))]
11324  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11325  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11326  [(set_attr "type" "sseishft")
11327   (set_attr "prefix_data16" "0")
11328   (set_attr "prefix_extra" "2")
11329   (set_attr "mode" "TI")])
11330
11331;; SSE2 doesn't have some shift variants, so define versions for XOP
11332(define_expand "ashlv16qi3"
11333  [(set (match_operand:V16QI 0 "register_operand" "")
11334	(ashift:V16QI
11335	  (match_operand:V16QI 1 "register_operand" "")
11336	  (match_operand:SI 2 "nonmemory_operand" "")))]
11337  "TARGET_XOP"
11338{
11339  rtx reg = gen_reg_rtx (V16QImode);
11340  rtx par;
11341  int i;
11342
11343  par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11344  for (i = 0; i < 16; i++)
11345    XVECEXP (par, 0, i) = operands[2];
11346
11347  emit_insn (gen_vec_initv16qi (reg, par));
11348  emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11349  DONE;
11350})
11351
11352(define_expand "<shift_insn>v16qi3"
11353  [(set (match_operand:V16QI 0 "register_operand" "")
11354	(any_shiftrt:V16QI
11355	  (match_operand:V16QI 1 "register_operand" "")
11356	  (match_operand:SI 2 "nonmemory_operand" "")))]
11357  "TARGET_XOP"
11358{
11359  rtx reg = gen_reg_rtx (V16QImode);
11360  rtx par;
11361  bool negate = false;
11362  rtx (*shift_insn)(rtx, rtx, rtx);
11363  int i;
11364
11365  if (CONST_INT_P (operands[2]))
11366    operands[2] = GEN_INT (-INTVAL (operands[2]));
11367  else
11368    negate = true;
11369
11370  par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11371  for (i = 0; i < 16; i++)
11372    XVECEXP (par, 0, i) = operands[2];
11373
11374  emit_insn (gen_vec_initv16qi (reg, par));
11375
11376  if (negate)
11377    emit_insn (gen_negv16qi2 (reg, reg));
11378
11379  if (<CODE> == LSHIFTRT)
11380    shift_insn = gen_xop_shlv16qi3;
11381  else
11382    shift_insn = gen_xop_shav16qi3;
11383
11384  emit_insn (shift_insn (operands[0], operands[1], reg));
11385  DONE;
11386})
11387
11388(define_expand "ashrv2di3"
11389  [(set (match_operand:V2DI 0 "register_operand" "")
11390	(ashiftrt:V2DI
11391	  (match_operand:V2DI 1 "register_operand" "")
11392	  (match_operand:DI 2 "nonmemory_operand" "")))]
11393  "TARGET_XOP"
11394{
11395  rtx reg = gen_reg_rtx (V2DImode);
11396  rtx par;
11397  bool negate = false;
11398  int i;
11399
11400  if (CONST_INT_P (operands[2]))
11401    operands[2] = GEN_INT (-INTVAL (operands[2]));
11402  else
11403    negate = true;
11404
11405  par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11406  for (i = 0; i < 2; i++)
11407    XVECEXP (par, 0, i) = operands[2];
11408
11409  emit_insn (gen_vec_initv2di (reg, par));
11410
11411  if (negate)
11412    emit_insn (gen_negv2di2 (reg, reg));
11413
11414  emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11415  DONE;
11416})
11417
11418;; XOP FRCZ support
11419(define_insn "xop_frcz<mode>2"
11420  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11421	(unspec:FMAMODE
11422	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11423	 UNSPEC_FRCZ))]
11424  "TARGET_XOP"
11425  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11426  [(set_attr "type" "ssecvt1")
11427   (set_attr "mode" "<MODE>")])
11428
11429;; scalar insns
11430(define_expand "xop_vmfrcz<mode>2"
11431  [(set (match_operand:VF_128 0 "register_operand")
11432	(vec_merge:VF_128
11433	  (unspec:VF_128
11434	   [(match_operand:VF_128 1 "nonimmediate_operand")]
11435	   UNSPEC_FRCZ)
11436	  (match_dup 3)
11437	  (const_int 1)))]
11438  "TARGET_XOP"
11439{
11440  operands[3] = CONST0_RTX (<MODE>mode);
11441})
11442
11443(define_insn "*xop_vmfrcz_<mode>"
11444  [(set (match_operand:VF_128 0 "register_operand" "=x")
11445	(vec_merge:VF_128
11446	  (unspec:VF_128
11447	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11448	   UNSPEC_FRCZ)
11449	  (match_operand:VF_128 2 "const0_operand")
11450	  (const_int 1)))]
11451  "TARGET_XOP"
11452  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11453  [(set_attr "type" "ssecvt1")
11454   (set_attr "mode" "<MODE>")])
11455
11456(define_insn "xop_maskcmp<mode>3"
11457  [(set (match_operand:VI_128 0 "register_operand" "=x")
11458	(match_operator:VI_128 1 "ix86_comparison_int_operator"
11459	 [(match_operand:VI_128 2 "register_operand" "x")
11460	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11461  "TARGET_XOP"
11462  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11463  [(set_attr "type" "sse4arg")
11464   (set_attr "prefix_data16" "0")
11465   (set_attr "prefix_rep" "0")
11466   (set_attr "prefix_extra" "2")
11467   (set_attr "length_immediate" "1")
11468   (set_attr "mode" "TI")])
11469
11470(define_insn "xop_maskcmp_uns<mode>3"
11471  [(set (match_operand:VI_128 0 "register_operand" "=x")
11472	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11473	 [(match_operand:VI_128 2 "register_operand" "x")
11474	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11475  "TARGET_XOP"
11476  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11477  [(set_attr "type" "ssecmp")
11478   (set_attr "prefix_data16" "0")
11479   (set_attr "prefix_rep" "0")
11480   (set_attr "prefix_extra" "2")
11481   (set_attr "length_immediate" "1")
11482   (set_attr "mode" "TI")])
11483
11484;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11485;; and pcomneu* not to be converted to the signed ones in case somebody needs
11486;; the exact instruction generated for the intrinsic.
11487(define_insn "xop_maskcmp_uns2<mode>3"
11488  [(set (match_operand:VI_128 0 "register_operand" "=x")
11489	(unspec:VI_128
11490	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11491	  [(match_operand:VI_128 2 "register_operand" "x")
11492	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11493	 UNSPEC_XOP_UNSIGNED_CMP))]
11494  "TARGET_XOP"
11495  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11496  [(set_attr "type" "ssecmp")
11497   (set_attr "prefix_data16" "0")
11498   (set_attr "prefix_extra" "2")
11499   (set_attr "length_immediate" "1")
11500   (set_attr "mode" "TI")])
11501
11502;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
11503;; being added here to be complete.
11504(define_insn "xop_pcom_tf<mode>3"
11505  [(set (match_operand:VI_128 0 "register_operand" "=x")
11506	(unspec:VI_128
11507	  [(match_operand:VI_128 1 "register_operand" "x")
11508	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11509	   (match_operand:SI 3 "const_int_operand" "n")]
11510	  UNSPEC_XOP_TRUEFALSE))]
11511  "TARGET_XOP"
11512{
11513  return ((INTVAL (operands[3]) != 0)
11514	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11515	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11516}
11517  [(set_attr "type" "ssecmp")
11518   (set_attr "prefix_data16" "0")
11519   (set_attr "prefix_extra" "2")
11520   (set_attr "length_immediate" "1")
11521   (set_attr "mode" "TI")])
11522
11523(define_insn "xop_vpermil2<mode>3"
11524  [(set (match_operand:VF 0 "register_operand" "=x")
11525	(unspec:VF
11526	  [(match_operand:VF 1 "register_operand" "x")
11527	   (match_operand:VF 2 "nonimmediate_operand" "%x")
11528	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11529	   (match_operand:SI 4 "const_0_to_3_operand" "n")]
11530	  UNSPEC_VPERMIL2))]
11531  "TARGET_XOP"
11532  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11533  [(set_attr "type" "sse4arg")
11534   (set_attr "length_immediate" "1")
11535   (set_attr "mode" "<MODE>")])
11536
11537;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11538
11539(define_insn "aesenc"
11540  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11541	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11542		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11543		      UNSPEC_AESENC))]
11544  "TARGET_AES"
11545  "@
11546   aesenc\t{%2, %0|%0, %2}
11547   vaesenc\t{%2, %1, %0|%0, %1, %2}"
11548  [(set_attr "isa" "noavx,avx")
11549   (set_attr "type" "sselog1")
11550   (set_attr "prefix_extra" "1")
11551   (set_attr "prefix" "orig,vex")
11552   (set_attr "mode" "TI")])
11553
11554(define_insn "aesenclast"
11555  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11556	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11557		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11558		      UNSPEC_AESENCLAST))]
11559  "TARGET_AES"
11560  "@
11561   aesenclast\t{%2, %0|%0, %2}
11562   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11563  [(set_attr "isa" "noavx,avx")
11564   (set_attr "type" "sselog1")
11565   (set_attr "prefix_extra" "1")
11566   (set_attr "prefix" "orig,vex")
11567   (set_attr "mode" "TI")])
11568
11569(define_insn "aesdec"
11570  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11571	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11572		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11573		      UNSPEC_AESDEC))]
11574  "TARGET_AES"
11575  "@
11576   aesdec\t{%2, %0|%0, %2}
11577   vaesdec\t{%2, %1, %0|%0, %1, %2}"
11578  [(set_attr "isa" "noavx,avx")
11579   (set_attr "type" "sselog1")
11580   (set_attr "prefix_extra" "1")
11581   (set_attr "prefix" "orig,vex")
11582   (set_attr "mode" "TI")])
11583
11584(define_insn "aesdeclast"
11585  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11586	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11587		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11588		      UNSPEC_AESDECLAST))]
11589  "TARGET_AES"
11590  "@
11591   aesdeclast\t{%2, %0|%0, %2}
11592   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11593  [(set_attr "isa" "noavx,avx")
11594   (set_attr "type" "sselog1")
11595   (set_attr "prefix_extra" "1")
11596   (set_attr "prefix" "orig,vex")
11597   (set_attr "mode" "TI")])
11598
11599(define_insn "aesimc"
11600  [(set (match_operand:V2DI 0 "register_operand" "=x")
11601	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11602		      UNSPEC_AESIMC))]
11603  "TARGET_AES"
11604  "%vaesimc\t{%1, %0|%0, %1}"
11605  [(set_attr "type" "sselog1")
11606   (set_attr "prefix_extra" "1")
11607   (set_attr "prefix" "maybe_vex")
11608   (set_attr "mode" "TI")])
11609
11610(define_insn "aeskeygenassist"
11611  [(set (match_operand:V2DI 0 "register_operand" "=x")
11612	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11613		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
11614		     UNSPEC_AESKEYGENASSIST))]
11615  "TARGET_AES"
11616  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11617  [(set_attr "type" "sselog1")
11618   (set_attr "prefix_extra" "1")
11619   (set_attr "length_immediate" "1")
11620   (set_attr "prefix" "maybe_vex")
11621   (set_attr "mode" "TI")])
11622
11623(define_insn "pclmulqdq"
11624  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11625	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11626		      (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11627		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11628		     UNSPEC_PCLMUL))]
11629  "TARGET_PCLMUL"
11630  "@
11631   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11632   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11633  [(set_attr "isa" "noavx,avx")
11634   (set_attr "type" "sselog1")
11635   (set_attr "prefix_extra" "1")
11636   (set_attr "length_immediate" "1")
11637   (set_attr "prefix" "orig,vex")
11638   (set_attr "mode" "TI")])
11639
11640(define_expand "avx_vzeroall"
11641  [(match_par_dup 0 [(const_int 0)])]
11642  "TARGET_AVX"
11643{
11644  int nregs = TARGET_64BIT ? 16 : 8;
11645  int regno;
11646
11647  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11648
11649  XVECEXP (operands[0], 0, 0)
11650    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11651			       UNSPECV_VZEROALL);
11652
11653  for (regno = 0; regno < nregs; regno++)
11654    XVECEXP (operands[0], 0, regno + 1)
11655      = gen_rtx_SET (VOIDmode,
11656		     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11657		     CONST0_RTX (V8SImode));
11658})
11659
11660(define_insn "*avx_vzeroall"
11661  [(match_parallel 0 "vzeroall_operation"
11662    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11663  "TARGET_AVX"
11664  "vzeroall"
11665  [(set_attr "type" "sse")
11666   (set_attr "modrm" "0")
11667   (set_attr "memory" "none")
11668   (set_attr "prefix" "vex")
11669   (set_attr "mode" "OI")])
11670
11671;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11672;; if the upper 128bits are unused.
11673(define_insn "avx_vzeroupper"
11674  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11675		    UNSPECV_VZEROUPPER)]
11676  "TARGET_AVX"
11677  "vzeroupper"
11678  [(set_attr "type" "sse")
11679   (set_attr "modrm" "0")
11680   (set_attr "memory" "none")
11681   (set_attr "prefix" "vex")
11682   (set_attr "mode" "OI")])
11683
11684(define_mode_attr AVXTOSSEMODE
11685  [(V4DI "V2DI") (V2DI "V2DI")
11686   (V8SI "V4SI") (V4SI "V4SI")
11687   (V16HI "V8HI") (V8HI "V8HI")
11688   (V32QI "V16QI") (V16QI "V16QI")])
11689
11690(define_insn "avx2_pbroadcast<mode>"
11691  [(set (match_operand:VI 0 "register_operand" "=x")
11692	(vec_duplicate:VI
11693	  (vec_select:<ssescalarmode>
11694	    (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11695	    (parallel [(const_int 0)]))))]
11696  "TARGET_AVX2"
11697  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11698  [(set_attr "type" "ssemov")
11699   (set_attr "prefix_extra" "1")
11700   (set_attr "prefix" "vex")
11701   (set_attr "mode" "<sseinsnmode>")])
11702
11703(define_insn "avx2_permvarv8si"
11704  [(set (match_operand:V8SI 0 "register_operand" "=x")
11705	(unspec:V8SI
11706	  [(match_operand:V8SI 1 "nonimmediate_operand" "xm")
11707	   (match_operand:V8SI 2 "register_operand" "x")]
11708	  UNSPEC_VPERMSI))]
11709  "TARGET_AVX2"
11710  "vpermd\t{%1, %2, %0|%0, %2, %1}"
11711  [(set_attr "type" "sselog")
11712   (set_attr "prefix" "vex")
11713   (set_attr "mode" "OI")])
11714
11715(define_insn "avx2_permv4df"
11716  [(set (match_operand:V4DF 0 "register_operand" "=x")
11717	(unspec:V4DF
11718	  [(match_operand:V4DF 1 "register_operand" "xm")
11719	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
11720	  UNSPEC_VPERMDF))]
11721  "TARGET_AVX2"
11722  "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11723  [(set_attr "type" "sselog")
11724   (set_attr "prefix_extra" "1")
11725   (set_attr "prefix" "vex")
11726   (set_attr "mode" "OI")])
11727
11728(define_insn "avx2_permvarv8sf"
11729  [(set (match_operand:V8SF 0 "register_operand" "=x")
11730	(unspec:V8SF
11731	  [(match_operand:V8SF 1 "nonimmediate_operand" "xm")
11732	   (match_operand:V8SI 2 "register_operand" "x")]
11733	  UNSPEC_VPERMSF))]
11734  "TARGET_AVX2"
11735  "vpermps\t{%1, %2, %0|%0, %2, %1}"
11736  [(set_attr "type" "sselog")
11737   (set_attr "prefix" "vex")
11738   (set_attr "mode" "OI")])
11739
11740(define_expand "avx2_permv4di"
11741  [(match_operand:V4DI 0 "register_operand" "")
11742   (match_operand:V4DI 1 "nonimmediate_operand" "")
11743   (match_operand:SI 2 "const_0_to_255_operand" "")]
11744  "TARGET_AVX2"
11745{
11746  int mask = INTVAL (operands[2]);
11747  emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11748				  GEN_INT ((mask >> 0) & 3),
11749				  GEN_INT ((mask >> 2) & 3),
11750				  GEN_INT ((mask >> 4) & 3),
11751				  GEN_INT ((mask >> 6) & 3)));
11752  DONE;
11753})
11754
11755(define_insn "avx2_permv4di_1"
11756  [(set (match_operand:V4DI 0 "register_operand" "=x")
11757	(vec_select:V4DI
11758	  (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11759	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
11760		     (match_operand 3 "const_0_to_3_operand" "")
11761		     (match_operand 4 "const_0_to_3_operand" "")
11762		     (match_operand 5 "const_0_to_3_operand" "")])))]
11763  "TARGET_AVX2"
11764{
11765  int mask = 0;
11766  mask |= INTVAL (operands[2]) << 0;
11767  mask |= INTVAL (operands[3]) << 2;
11768  mask |= INTVAL (operands[4]) << 4;
11769  mask |= INTVAL (operands[5]) << 6;
11770  operands[2] = GEN_INT (mask);
11771  return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11772}
11773  [(set_attr "type" "sselog")
11774   (set_attr "prefix" "vex")
11775   (set_attr "mode" "OI")])
11776
11777(define_insn "avx2_permv2ti"
11778  [(set (match_operand:V4DI 0 "register_operand" "=x")
11779	(unspec:V4DI
11780	  [(match_operand:V4DI 1 "register_operand" "x")
11781	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11782	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
11783	  UNSPEC_VPERMTI))]
11784  "TARGET_AVX2"
11785  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11786  [(set_attr "type" "sselog")
11787   (set_attr "prefix" "vex")
11788   (set_attr "mode" "OI")])
11789
11790(define_insn "avx2_vec_dupv4df"
11791  [(set (match_operand:V4DF 0 "register_operand" "=x")
11792	(vec_duplicate:V4DF
11793	  (vec_select:DF
11794	    (match_operand:V2DF 1 "register_operand" "x")
11795	    (parallel [(const_int 0)]))))]
11796  "TARGET_AVX2"
11797  "vbroadcastsd\t{%1, %0|%0, %1}"
11798  [(set_attr "type" "sselog1")
11799   (set_attr "prefix" "vex")
11800   (set_attr "mode" "V4DF")])
11801
11802;; Modes handled by AVX vec_dup patterns.
11803(define_mode_iterator AVX_VEC_DUP_MODE
11804  [V8SI V8SF V4DI V4DF])
11805
11806(define_insn "vec_dup<mode>"
11807  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11808	(vec_duplicate:AVX_VEC_DUP_MODE
11809	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11810  "TARGET_AVX"
11811  "@
11812   vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11813   #"
11814  [(set_attr "type" "ssemov")
11815   (set_attr "prefix_extra" "1")
11816   (set_attr "prefix" "vex")
11817   (set_attr "mode" "V8SF")])
11818
11819(define_insn "avx2_vbroadcasti128_<mode>"
11820  [(set (match_operand:VI_256 0 "register_operand" "=x")
11821	(vec_concat:VI_256
11822	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11823	  (match_dup 1)))]
11824  "TARGET_AVX2"
11825  "vbroadcasti128\t{%1, %0|%0, %1}"
11826  [(set_attr "type" "ssemov")
11827   (set_attr "prefix_extra" "1")
11828   (set_attr "prefix" "vex")
11829   (set_attr "mode" "OI")])
11830
11831(define_split
11832  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11833	(vec_duplicate:AVX_VEC_DUP_MODE
11834	  (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11835  "TARGET_AVX && reload_completed"
11836  [(set (match_dup 2)
11837	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11838   (set (match_dup 0)
11839	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11840  "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11841
11842(define_insn "avx_vbroadcastf128_<mode>"
11843  [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11844	(vec_concat:V_256
11845	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11846	  (match_dup 1)))]
11847  "TARGET_AVX"
11848  "@
11849   vbroadcast<i128>\t{%1, %0|%0, %1}
11850   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11851   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11852  [(set_attr "type" "ssemov,sselog1,sselog1")
11853   (set_attr "prefix_extra" "1")
11854   (set_attr "length_immediate" "0,1,1")
11855   (set_attr "prefix" "vex")
11856   (set_attr "mode" "<sseinsnmode>")])
11857
11858;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11859;; If it so happens that the input is in memory, use vbroadcast.
11860;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11861(define_insn "*avx_vperm_broadcast_v4sf"
11862  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11863	(vec_select:V4SF
11864	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11865	  (match_parallel 2 "avx_vbroadcast_operand"
11866	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
11867  "TARGET_AVX"
11868{
11869  int elt = INTVAL (operands[3]);
11870  switch (which_alternative)
11871    {
11872    case 0:
11873    case 1:
11874      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11875      return "vbroadcastss\t{%1, %0|%0, %1}";
11876    case 2:
11877      operands[2] = GEN_INT (elt * 0x55);
11878      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11879    default:
11880      gcc_unreachable ();
11881    }
11882}
11883  [(set_attr "type" "ssemov,ssemov,sselog1")
11884   (set_attr "prefix_extra" "1")
11885   (set_attr "length_immediate" "0,0,1")
11886   (set_attr "prefix" "vex")
11887   (set_attr "mode" "SF,SF,V4SF")])
11888
11889(define_insn_and_split "*avx_vperm_broadcast_<mode>"
11890  [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11891	(vec_select:VF_256
11892	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11893	  (match_parallel 2 "avx_vbroadcast_operand"
11894	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
11895  "TARGET_AVX"
11896  "#"
11897  "&& reload_completed"
11898  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11899{
11900  rtx op0 = operands[0], op1 = operands[1];
11901  int elt = INTVAL (operands[3]);
11902
11903  if (REG_P (op1))
11904    {
11905      int mask;
11906
11907      /* Shuffle element we care about into all elements of the 128-bit lane.
11908	 The other lane gets shuffled too, but we don't care.  */
11909      if (<MODE>mode == V4DFmode)
11910	mask = (elt & 1 ? 15 : 0);
11911      else
11912	mask = (elt & 3) * 0x55;
11913      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11914
11915      /* Shuffle the lane we care about into both lanes of the dest.  */
11916      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11917      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11918      DONE;
11919    }
11920
11921  operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11922				   elt * GET_MODE_SIZE (<ssescalarmode>mode));
11923})
11924
11925(define_expand "avx_vpermil<mode>"
11926  [(set (match_operand:VF2 0 "register_operand" "")
11927	(vec_select:VF2
11928	  (match_operand:VF2 1 "nonimmediate_operand" "")
11929	  (match_operand:SI 2 "const_0_to_255_operand" "")))]
11930  "TARGET_AVX"
11931{
11932  int mask = INTVAL (operands[2]);
11933  rtx perm[<ssescalarnum>];
11934
11935  perm[0] = GEN_INT (mask & 1);
11936  perm[1] = GEN_INT ((mask >> 1) & 1);
11937  if (<MODE>mode == V4DFmode)
11938    {
11939      perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11940      perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11941    }
11942
11943  operands[2]
11944    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11945})
11946
11947(define_expand "avx_vpermil<mode>"
11948  [(set (match_operand:VF1 0 "register_operand" "")
11949	(vec_select:VF1
11950	  (match_operand:VF1 1 "nonimmediate_operand" "")
11951	  (match_operand:SI 2 "const_0_to_255_operand" "")))]
11952  "TARGET_AVX"
11953{
11954  int mask = INTVAL (operands[2]);
11955  rtx perm[<ssescalarnum>];
11956
11957  perm[0] = GEN_INT (mask & 3);
11958  perm[1] = GEN_INT ((mask >> 2) & 3);
11959  perm[2] = GEN_INT ((mask >> 4) & 3);
11960  perm[3] = GEN_INT ((mask >> 6) & 3);
11961  if (<MODE>mode == V8SFmode)
11962    {
11963      perm[4] = GEN_INT ((mask & 3) + 4);
11964      perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11965      perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11966      perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11967    }
11968
11969  operands[2]
11970    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11971})
11972
11973(define_insn "*avx_vpermilp<mode>"
11974  [(set (match_operand:VF 0 "register_operand" "=x")
11975	(vec_select:VF
11976	  (match_operand:VF 1 "nonimmediate_operand" "xm")
11977	  (match_parallel 2 ""
11978	    [(match_operand 3 "const_int_operand" "")])))]
11979  "TARGET_AVX
11980   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11981{
11982  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11983  operands[2] = GEN_INT (mask);
11984  return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11985}
11986  [(set_attr "type" "sselog")
11987   (set_attr "prefix_extra" "1")
11988   (set_attr "length_immediate" "1")
11989   (set_attr "prefix" "vex")
11990   (set_attr "mode" "<MODE>")])
11991
11992(define_insn "avx_vpermilvar<mode>3"
11993  [(set (match_operand:VF 0 "register_operand" "=x")
11994	(unspec:VF
11995	  [(match_operand:VF 1 "register_operand" "x")
11996	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11997	  UNSPEC_VPERMIL))]
11998  "TARGET_AVX"
11999  "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12000  [(set_attr "type" "sselog")
12001   (set_attr "prefix_extra" "1")
12002   (set_attr "prefix" "vex")
12003   (set_attr "mode" "<MODE>")])
12004
12005(define_expand "avx_vperm2f128<mode>3"
12006  [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12007	(unspec:AVX256MODE2P
12008	  [(match_operand:AVX256MODE2P 1 "register_operand" "")
12009	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12010	   (match_operand:SI 3 "const_0_to_255_operand" "")]
12011	  UNSPEC_VPERMIL2F128))]
12012  "TARGET_AVX"
12013{
12014  int mask = INTVAL (operands[3]);
12015  if ((mask & 0x88) == 0)
12016    {
12017      rtx perm[<ssescalarnum>], t1, t2;
12018      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12019
12020      base = (mask & 3) * nelt2;
12021      for (i = 0; i < nelt2; ++i)
12022	perm[i] = GEN_INT (base + i);
12023
12024      base = ((mask >> 4) & 3) * nelt2;
12025      for (i = 0; i < nelt2; ++i)
12026	perm[i + nelt2] = GEN_INT (base + i);
12027
12028      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12029			       operands[1], operands[2]);
12030      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12031      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12032      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12033      emit_insn (t2);
12034      DONE;
12035    }
12036})
12037
12038;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12039;; means that in order to represent this properly in rtl we'd have to
12040;; nest *another* vec_concat with a zero operand and do the select from
12041;; a 4x wide vector.  That doesn't seem very nice.
12042(define_insn "*avx_vperm2f128<mode>_full"
12043  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12044	(unspec:AVX256MODE2P
12045	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12046	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12047	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
12048	  UNSPEC_VPERMIL2F128))]
12049  "TARGET_AVX"
12050  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12051  [(set_attr "type" "sselog")
12052   (set_attr "prefix_extra" "1")
12053   (set_attr "length_immediate" "1")
12054   (set_attr "prefix" "vex")
12055   (set_attr "mode" "<sseinsnmode>")])
12056
12057(define_insn "*avx_vperm2f128<mode>_nozero"
12058  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12059	(vec_select:AVX256MODE2P
12060	  (vec_concat:<ssedoublevecmode>
12061	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
12062	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12063	  (match_parallel 3 ""
12064	    [(match_operand 4 "const_int_operand" "")])))]
12065  "TARGET_AVX
12066   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12067{
12068  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12069  if (mask == 0x12)
12070    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12071  if (mask == 0x20)
12072    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12073  operands[3] = GEN_INT (mask);
12074  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12075}
12076  [(set_attr "type" "sselog")
12077   (set_attr "prefix_extra" "1")
12078   (set_attr "length_immediate" "1")
12079   (set_attr "prefix" "vex")
12080   (set_attr "mode" "<sseinsnmode>")])
12081
12082(define_expand "avx_vinsertf128<mode>"
12083  [(match_operand:V_256 0 "register_operand" "")
12084   (match_operand:V_256 1 "register_operand" "")
12085   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12086   (match_operand:SI 3 "const_0_to_1_operand" "")]
12087  "TARGET_AVX"
12088{
12089  rtx (*insn)(rtx, rtx, rtx);
12090
12091  switch (INTVAL (operands[3]))
12092    {
12093    case 0:
12094      insn = gen_vec_set_lo_<mode>;
12095      break;
12096    case 1:
12097      insn = gen_vec_set_hi_<mode>;
12098      break;
12099    default:
12100      gcc_unreachable ();
12101    }
12102
12103  emit_insn (insn (operands[0], operands[1], operands[2]));
12104  DONE;
12105})
12106
12107(define_insn "avx2_vec_set_lo_v4di"
12108  [(set (match_operand:V4DI 0 "register_operand" "=x")
12109	(vec_concat:V4DI
12110	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12111	  (vec_select:V2DI
12112	    (match_operand:V4DI 1 "register_operand" "x")
12113	    (parallel [(const_int 2) (const_int 3)]))))]
12114  "TARGET_AVX2"
12115  "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12116  [(set_attr "type" "sselog")
12117   (set_attr "prefix_extra" "1")
12118   (set_attr "length_immediate" "1")
12119   (set_attr "prefix" "vex")
12120   (set_attr "mode" "OI")])
12121
12122(define_insn "avx2_vec_set_hi_v4di"
12123  [(set (match_operand:V4DI 0 "register_operand" "=x")
12124	(vec_concat:V4DI
12125	  (vec_select:V2DI
12126	    (match_operand:V4DI 1 "register_operand" "x")
12127	    (parallel [(const_int 0) (const_int 1)]))
12128	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12129  "TARGET_AVX2"
12130  "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12131  [(set_attr "type" "sselog")
12132   (set_attr "prefix_extra" "1")
12133   (set_attr "length_immediate" "1")
12134   (set_attr "prefix" "vex")
12135   (set_attr "mode" "OI")])
12136
12137(define_insn "vec_set_lo_<mode>"
12138  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12139	(vec_concat:VI8F_256
12140	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12141	  (vec_select:<ssehalfvecmode>
12142	    (match_operand:VI8F_256 1 "register_operand" "x")
12143	    (parallel [(const_int 2) (const_int 3)]))))]
12144  "TARGET_AVX"
12145  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12146  [(set_attr "type" "sselog")
12147   (set_attr "prefix_extra" "1")
12148   (set_attr "length_immediate" "1")
12149   (set_attr "prefix" "vex")
12150   (set_attr "mode" "<sseinsnmode>")])
12151
12152(define_insn "vec_set_hi_<mode>"
12153  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12154	(vec_concat:VI8F_256
12155	  (vec_select:<ssehalfvecmode>
12156	    (match_operand:VI8F_256 1 "register_operand" "x")
12157	    (parallel [(const_int 0) (const_int 1)]))
12158	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12159  "TARGET_AVX"
12160  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12161  [(set_attr "type" "sselog")
12162   (set_attr "prefix_extra" "1")
12163   (set_attr "length_immediate" "1")
12164   (set_attr "prefix" "vex")
12165   (set_attr "mode" "<sseinsnmode>")])
12166
12167(define_insn "vec_set_lo_<mode>"
12168  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12169	(vec_concat:VI4F_256
12170	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12171	  (vec_select:<ssehalfvecmode>
12172	    (match_operand:VI4F_256 1 "register_operand" "x")
12173	    (parallel [(const_int 4) (const_int 5)
12174		       (const_int 6) (const_int 7)]))))]
12175  "TARGET_AVX"
12176  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12177  [(set_attr "type" "sselog")
12178   (set_attr "prefix_extra" "1")
12179   (set_attr "length_immediate" "1")
12180   (set_attr "prefix" "vex")
12181   (set_attr "mode" "<sseinsnmode>")])
12182
12183(define_insn "vec_set_hi_<mode>"
12184  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12185	(vec_concat:VI4F_256
12186	  (vec_select:<ssehalfvecmode>
12187	    (match_operand:VI4F_256 1 "register_operand" "x")
12188	    (parallel [(const_int 0) (const_int 1)
12189		       (const_int 2) (const_int 3)]))
12190	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12191  "TARGET_AVX"
12192  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12193  [(set_attr "type" "sselog")
12194   (set_attr "prefix_extra" "1")
12195   (set_attr "length_immediate" "1")
12196   (set_attr "prefix" "vex")
12197   (set_attr "mode" "<sseinsnmode>")])
12198
12199(define_insn "vec_set_lo_v16hi"
12200  [(set (match_operand:V16HI 0 "register_operand" "=x")
12201	(vec_concat:V16HI
12202	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12203	  (vec_select:V8HI
12204	    (match_operand:V16HI 1 "register_operand" "x")
12205	    (parallel [(const_int 8) (const_int 9)
12206		       (const_int 10) (const_int 11)
12207		       (const_int 12) (const_int 13)
12208		       (const_int 14) (const_int 15)]))))]
12209  "TARGET_AVX"
12210  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12211  [(set_attr "type" "sselog")
12212   (set_attr "prefix_extra" "1")
12213   (set_attr "length_immediate" "1")
12214   (set_attr "prefix" "vex")
12215   (set_attr "mode" "OI")])
12216
12217(define_insn "vec_set_hi_v16hi"
12218  [(set (match_operand:V16HI 0 "register_operand" "=x")
12219	(vec_concat:V16HI
12220	  (vec_select:V8HI
12221	    (match_operand:V16HI 1 "register_operand" "x")
12222	    (parallel [(const_int 0) (const_int 1)
12223		       (const_int 2) (const_int 3)
12224		       (const_int 4) (const_int 5)
12225		       (const_int 6) (const_int 7)]))
12226	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12227  "TARGET_AVX"
12228  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12229  [(set_attr "type" "sselog")
12230   (set_attr "prefix_extra" "1")
12231   (set_attr "length_immediate" "1")
12232   (set_attr "prefix" "vex")
12233   (set_attr "mode" "OI")])
12234
12235(define_insn "vec_set_lo_v32qi"
12236  [(set (match_operand:V32QI 0 "register_operand" "=x")
12237	(vec_concat:V32QI
12238	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12239	  (vec_select:V16QI
12240	    (match_operand:V32QI 1 "register_operand" "x")
12241	    (parallel [(const_int 16) (const_int 17)
12242		       (const_int 18) (const_int 19)
12243		       (const_int 20) (const_int 21)
12244		       (const_int 22) (const_int 23)
12245		       (const_int 24) (const_int 25)
12246		       (const_int 26) (const_int 27)
12247		       (const_int 28) (const_int 29)
12248		       (const_int 30) (const_int 31)]))))]
12249  "TARGET_AVX"
12250  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12251  [(set_attr "type" "sselog")
12252   (set_attr "prefix_extra" "1")
12253   (set_attr "length_immediate" "1")
12254   (set_attr "prefix" "vex")
12255   (set_attr "mode" "OI")])
12256
12257(define_insn "vec_set_hi_v32qi"
12258  [(set (match_operand:V32QI 0 "register_operand" "=x")
12259	(vec_concat:V32QI
12260	  (vec_select:V16QI
12261	    (match_operand:V32QI 1 "register_operand" "x")
12262	    (parallel [(const_int 0) (const_int 1)
12263		       (const_int 2) (const_int 3)
12264		       (const_int 4) (const_int 5)
12265		       (const_int 6) (const_int 7)
12266		       (const_int 8) (const_int 9)
12267		       (const_int 10) (const_int 11)
12268		       (const_int 12) (const_int 13)
12269		       (const_int 14) (const_int 15)]))
12270	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12271  "TARGET_AVX"
12272  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12273  [(set_attr "type" "sselog")
12274   (set_attr "prefix_extra" "1")
12275   (set_attr "length_immediate" "1")
12276   (set_attr "prefix" "vex")
12277   (set_attr "mode" "OI")])
12278
12279(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12280  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12281	(unspec:V48_AVX2
12282	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12283	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
12284	  UNSPEC_MASKMOV))]
12285  "TARGET_AVX"
12286  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12287  [(set_attr "type" "sselog1")
12288   (set_attr "prefix_extra" "1")
12289   (set_attr "prefix" "vex")
12290   (set_attr "mode" "<sseinsnmode>")])
12291
12292(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12293  [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12294	(unspec:V48_AVX2
12295	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12296	   (match_operand:V48_AVX2 2 "register_operand" "x")
12297	   (match_dup 0)]
12298	  UNSPEC_MASKMOV))]
12299  "TARGET_AVX"
12300  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12301  [(set_attr "type" "sselog1")
12302   (set_attr "prefix_extra" "1")
12303   (set_attr "prefix" "vex")
12304   (set_attr "mode" "<sseinsnmode>")])
12305
12306(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12307  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12308	(unspec:AVX256MODE2P
12309	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12310	  UNSPEC_CAST))]
12311  "TARGET_AVX"
12312  "#"
12313  "&& reload_completed"
12314  [(const_int 0)]
12315{
12316  rtx op0 = operands[0];
12317  rtx op1 = operands[1];
12318  if (REG_P (op0))
12319    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12320  else
12321    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12322  emit_move_insn (op0, op1);
12323  DONE;
12324})
12325
12326(define_expand "vec_init<mode>"
12327  [(match_operand:V_256 0 "register_operand" "")
12328   (match_operand 1 "" "")]
12329  "TARGET_AVX"
12330{
12331  ix86_expand_vector_init (false, operands[0], operands[1]);
12332  DONE;
12333})
12334
12335(define_expand "avx2_extracti128"
12336  [(match_operand:V2DI 0 "nonimmediate_operand" "")
12337   (match_operand:V4DI 1 "register_operand" "")
12338   (match_operand:SI 2 "const_0_to_1_operand" "")]
12339  "TARGET_AVX2"
12340{
12341  rtx (*insn)(rtx, rtx);
12342
12343  switch (INTVAL (operands[2]))
12344    {
12345    case 0:
12346      insn = gen_vec_extract_lo_v4di;
12347      break;
12348    case 1:
12349      insn = gen_vec_extract_hi_v4di;
12350      break;
12351    default:
12352      gcc_unreachable ();
12353    }
12354
12355  emit_insn (insn (operands[0], operands[1]));
12356  DONE;
12357})
12358
12359(define_expand "avx2_inserti128"
12360  [(match_operand:V4DI 0 "register_operand" "")
12361   (match_operand:V4DI 1 "register_operand" "")
12362   (match_operand:V2DI 2 "nonimmediate_operand" "")
12363   (match_operand:SI 3 "const_0_to_1_operand" "")]
12364  "TARGET_AVX2"
12365{
12366  rtx (*insn)(rtx, rtx, rtx);
12367
12368  switch (INTVAL (operands[3]))
12369    {
12370    case 0:
12371      insn = gen_avx2_vec_set_lo_v4di;
12372      break;
12373    case 1:
12374      insn = gen_avx2_vec_set_hi_v4di;
12375      break;
12376    default:
12377      gcc_unreachable ();
12378    }
12379
12380  emit_insn (insn (operands[0], operands[1], operands[2]));
12381  DONE;
12382})
12383
12384(define_insn "avx2_ashrv<mode>"
12385  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12386	(ashiftrt:VI4_AVX2
12387	  (match_operand:VI4_AVX2 1 "register_operand" "x")
12388	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12389  "TARGET_AVX2"
12390  "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12391  [(set_attr "type" "sseishft")
12392   (set_attr "prefix" "vex")
12393   (set_attr "mode" "<sseinsnmode>")])
12394
12395(define_insn "avx2_<shift_insn>v<mode>"
12396  [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12397	(any_lshift:VI48_AVX2
12398	  (match_operand:VI48_AVX2 1 "register_operand" "x")
12399	  (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12400  "TARGET_AVX2"
12401  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12402  [(set_attr "type" "sseishft")
12403   (set_attr "prefix" "vex")
12404   (set_attr "mode" "<sseinsnmode>")])
12405
12406(define_insn "avx_vec_concat<mode>"
12407  [(set (match_operand:V_256 0 "register_operand" "=x,x")
12408	(vec_concat:V_256
12409	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12410	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12411  "TARGET_AVX"
12412{
12413  switch (which_alternative)
12414    {
12415    case 0:
12416      return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12417    case 1:
12418      switch (get_attr_mode (insn))
12419	{
12420	case MODE_V8SF:
12421	  return "vmovaps\t{%1, %x0|%x0, %1}";
12422	case MODE_V4DF:
12423	  return "vmovapd\t{%1, %x0|%x0, %1}";
12424	default:
12425	  return "vmovdqa\t{%1, %x0|%x0, %1}";
12426	}
12427    default:
12428      gcc_unreachable ();
12429    }
12430}
12431  [(set_attr "type" "sselog,ssemov")
12432   (set_attr "prefix_extra" "1,*")
12433   (set_attr "length_immediate" "1,*")
12434   (set_attr "prefix" "vex")
12435   (set_attr "mode" "<sseinsnmode>")])
12436
12437(define_insn "vcvtph2ps"
12438  [(set (match_operand:V4SF 0 "register_operand" "=x")
12439	(vec_select:V4SF
12440	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12441		       UNSPEC_VCVTPH2PS)
12442	  (parallel [(const_int 0) (const_int 1)
12443		     (const_int 2) (const_int 3)])))]
12444  "TARGET_F16C"
12445  "vcvtph2ps\t{%1, %0|%0, %1}"
12446  [(set_attr "type" "ssecvt")
12447   (set_attr "prefix" "vex")
12448   (set_attr "mode" "V4SF")])
12449
12450(define_insn "*vcvtph2ps_load"
12451  [(set (match_operand:V4SF 0 "register_operand" "=x")
12452	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12453		     UNSPEC_VCVTPH2PS))]
12454  "TARGET_F16C"
12455  "vcvtph2ps\t{%1, %0|%0, %1}"
12456  [(set_attr "type" "ssecvt")
12457   (set_attr "prefix" "vex")
12458   (set_attr "mode" "V8SF")])
12459
12460(define_insn "vcvtph2ps256"
12461  [(set (match_operand:V8SF 0 "register_operand" "=x")
12462	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12463		     UNSPEC_VCVTPH2PS))]
12464  "TARGET_F16C"
12465  "vcvtph2ps\t{%1, %0|%0, %1}"
12466  [(set_attr "type" "ssecvt")
12467   (set_attr "prefix" "vex")
12468   (set_attr "mode" "V8SF")])
12469
12470(define_expand "vcvtps2ph"
12471  [(set (match_operand:V8HI 0 "register_operand" "")
12472	(vec_concat:V8HI
12473	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12474			(match_operand:SI 2 "const_0_to_255_operand" "")]
12475		       UNSPEC_VCVTPS2PH)
12476	  (match_dup 3)))]
12477  "TARGET_F16C"
12478  "operands[3] = CONST0_RTX (V4HImode);")
12479
12480(define_insn "*vcvtps2ph"
12481  [(set (match_operand:V8HI 0 "register_operand" "=x")
12482	(vec_concat:V8HI
12483	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12484			(match_operand:SI 2 "const_0_to_255_operand" "N")]
12485		       UNSPEC_VCVTPS2PH)
12486	  (match_operand:V4HI 3 "const0_operand" "")))]
12487  "TARGET_F16C"
12488  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12489  [(set_attr "type" "ssecvt")
12490   (set_attr "prefix" "vex")
12491   (set_attr "mode" "V4SF")])
12492
12493(define_insn "*vcvtps2ph_store"
12494  [(set (match_operand:V4HI 0 "memory_operand" "=m")
12495	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12496		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
12497		     UNSPEC_VCVTPS2PH))]
12498  "TARGET_F16C"
12499  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12500  [(set_attr "type" "ssecvt")
12501   (set_attr "prefix" "vex")
12502   (set_attr "mode" "V4SF")])
12503
12504(define_insn "vcvtps2ph256"
12505  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12506	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12507		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
12508		     UNSPEC_VCVTPS2PH))]
12509  "TARGET_F16C"
12510  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12511  [(set_attr "type" "ssecvt")
12512   (set_attr "prefix" "vex")
12513   (set_attr "mode" "V8SF")])
12514
12515;; For gather* insn patterns
12516(define_mode_iterator VEC_GATHER_MODE
12517		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12518(define_mode_attr VEC_GATHER_IDXSI
12519		      [(V2DI "V4SI") (V2DF "V4SI")
12520		       (V4DI "V4SI") (V4DF "V4SI")
12521		       (V4SI "V4SI") (V4SF "V4SI")
12522		       (V8SI "V8SI") (V8SF "V8SI")])
12523(define_mode_attr VEC_GATHER_IDXDI
12524		      [(V2DI "V2DI") (V2DF "V2DI")
12525		       (V4DI "V4DI") (V4DF "V4DI")
12526		       (V4SI "V2DI") (V4SF "V2DI")
12527		       (V8SI "V4DI") (V8SF "V4DI")])
12528(define_mode_attr VEC_GATHER_SRCDI
12529		      [(V2DI "V2DI") (V2DF "V2DF")
12530		       (V4DI "V4DI") (V4DF "V4DF")
12531		       (V4SI "V4SI") (V4SF "V4SF")
12532		       (V8SI "V4SI") (V8SF "V4SF")])
12533
12534(define_expand "avx2_gathersi<mode>"
12535  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12536		   (unspec:VEC_GATHER_MODE
12537		     [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12538		      (mem:<ssescalarmode>
12539			(match_par_dup 7
12540			  [(match_operand 2 "vsib_address_operand" "")
12541			   (match_operand:<VEC_GATHER_IDXSI>
12542			      3 "register_operand" "")
12543			   (match_operand:SI 5 "const1248_operand " "")]))
12544		      (mem:BLK (scratch))
12545		      (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12546		     UNSPEC_GATHER))
12547	      (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12548  "TARGET_AVX2"
12549{
12550  operands[7]
12551    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12552					operands[5]), UNSPEC_VSIBADDR);
12553})
12554
12555(define_insn "*avx2_gathersi<mode>"
12556  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12557	(unspec:VEC_GATHER_MODE
12558	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12559	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12560	     [(unspec:P
12561		[(match_operand:P 3 "vsib_address_operand" "p")
12562		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12563		 (match_operand:SI 6 "const1248_operand" "n")]
12564		UNSPEC_VSIBADDR)])
12565	   (mem:BLK (scratch))
12566	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12567	  UNSPEC_GATHER))
12568   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12569  "TARGET_AVX2"
12570  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12571  [(set_attr "type" "ssemov")
12572   (set_attr "prefix" "vex")
12573   (set_attr "mode" "<sseinsnmode>")])
12574
12575(define_insn "*avx2_gathersi<mode>_2"
12576  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12577	(unspec:VEC_GATHER_MODE
12578	  [(pc)
12579	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12580	     [(unspec:P
12581		[(match_operand:P 2 "vsib_address_operand" "p")
12582		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12583		 (match_operand:SI 5 "const1248_operand" "n")]
12584		UNSPEC_VSIBADDR)])
12585	   (mem:BLK (scratch))
12586	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12587	  UNSPEC_GATHER))
12588   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12589  "TARGET_AVX2"
12590  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12591  [(set_attr "type" "ssemov")
12592   (set_attr "prefix" "vex")
12593   (set_attr "mode" "<sseinsnmode>")])
12594
12595(define_expand "avx2_gatherdi<mode>"
12596  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12597		   (unspec:VEC_GATHER_MODE
12598		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12599		      (mem:<ssescalarmode>
12600			(match_par_dup 7
12601			  [(match_operand 2 "vsib_address_operand" "")
12602			   (match_operand:<VEC_GATHER_IDXDI>
12603			      3 "register_operand" "")
12604			   (match_operand:SI 5 "const1248_operand " "")]))
12605		      (mem:BLK (scratch))
12606		      (match_operand:<VEC_GATHER_SRCDI>
12607			4 "register_operand" "")]
12608		     UNSPEC_GATHER))
12609	      (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12610  "TARGET_AVX2"
12611{
12612  operands[7]
12613    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12614					operands[5]), UNSPEC_VSIBADDR);
12615})
12616
12617(define_insn "*avx2_gatherdi<mode>"
12618  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12619	(unspec:VEC_GATHER_MODE
12620	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12621	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12622	     [(unspec:P
12623		[(match_operand:P 3 "vsib_address_operand" "p")
12624		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12625		 (match_operand:SI 6 "const1248_operand" "n")]
12626		UNSPEC_VSIBADDR)])
12627	   (mem:BLK (scratch))
12628	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12629	  UNSPEC_GATHER))
12630   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12631  "TARGET_AVX2"
12632  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12633  [(set_attr "type" "ssemov")
12634   (set_attr "prefix" "vex")
12635   (set_attr "mode" "<sseinsnmode>")])
12636
12637(define_insn "*avx2_gatherdi<mode>_2"
12638  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12639	(unspec:VEC_GATHER_MODE
12640	  [(pc)
12641	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12642	     [(unspec:P
12643		[(match_operand:P 2 "vsib_address_operand" "p")
12644		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12645		 (match_operand:SI 5 "const1248_operand" "n")]
12646		UNSPEC_VSIBADDR)])
12647	   (mem:BLK (scratch))
12648	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12649	  UNSPEC_GATHER))
12650   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12651  "TARGET_AVX2"
12652{
12653  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12654    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12655  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12656}
12657  [(set_attr "type" "ssemov")
12658   (set_attr "prefix" "vex")
12659   (set_attr "mode" "<sseinsnmode>")])
12660
12661(define_insn "*avx2_gatherdi<mode>_3"
12662  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12663	(vec_select:<VEC_GATHER_SRCDI>
12664	  (unspec:VI4F_256
12665	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12666	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12667	       [(unspec:P
12668		  [(match_operand:P 3 "vsib_address_operand" "p")
12669		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12670		   (match_operand:SI 6 "const1248_operand" "n")]
12671		  UNSPEC_VSIBADDR)])
12672	     (mem:BLK (scratch))
12673	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12674	     UNSPEC_GATHER)
12675	  (parallel [(const_int 0) (const_int 1)
12676		     (const_int 2) (const_int 3)])))
12677   (clobber (match_scratch:VI4F_256 1 "=&x"))]
12678  "TARGET_AVX2"
12679  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
12680  [(set_attr "type" "ssemov")
12681   (set_attr "prefix" "vex")
12682   (set_attr "mode" "<sseinsnmode>")])
12683
12684(define_insn "*avx2_gatherdi<mode>_4"
12685  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12686	(vec_select:<VEC_GATHER_SRCDI>
12687	  (unspec:VI4F_256
12688	    [(pc)
12689	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12690	       [(unspec:P
12691		  [(match_operand:P 2 "vsib_address_operand" "p")
12692		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12693		   (match_operand:SI 5 "const1248_operand" "n")]
12694		  UNSPEC_VSIBADDR)])
12695	     (mem:BLK (scratch))
12696	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12697	    UNSPEC_GATHER)
12698	  (parallel [(const_int 0) (const_int 1)
12699		     (const_int 2) (const_int 3)])))
12700   (clobber (match_scratch:VI4F_256 1 "=&x"))]
12701  "TARGET_AVX2"
12702  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
12703  [(set_attr "type" "ssemov")
12704   (set_attr "prefix" "vex")
12705   (set_attr "mode" "<sseinsnmode>")])
12706