xref: /dragonfly/contrib/gcc-4.7/gcc/config/i386/sse.md (revision 95d28233)
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3;; Free Software Foundation, Inc.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_c_enum "unspec" [
22  ;; SSE
23  UNSPEC_MOVNT
24  UNSPEC_LOADU
25  UNSPEC_STOREU
26
27  ;; SSE3
28  UNSPEC_LDDQU
29
30  ;; SSSE3
31  UNSPEC_PSHUFB
32  UNSPEC_PSIGN
33  UNSPEC_PALIGNR
34
35  ;; For SSE4A support
36  UNSPEC_EXTRQI
37  UNSPEC_EXTRQ
38  UNSPEC_INSERTQI
39  UNSPEC_INSERTQ
40
41  ;; For SSE4.1 support
42  UNSPEC_BLENDV
43  UNSPEC_INSERTPS
44  UNSPEC_DP
45  UNSPEC_MOVNTDQA
46  UNSPEC_MPSADBW
47  UNSPEC_PHMINPOSUW
48  UNSPEC_PTEST
49
50  ;; For SSE4.2 support
51  UNSPEC_PCMPESTR
52  UNSPEC_PCMPISTR
53
54  ;; For FMA4 support
55  UNSPEC_FMADDSUB
56  UNSPEC_XOP_UNSIGNED_CMP
57  UNSPEC_XOP_TRUEFALSE
58  UNSPEC_XOP_PERMUTE
59  UNSPEC_FRCZ
60
61  ;; For AES support
62  UNSPEC_AESENC
63  UNSPEC_AESENCLAST
64  UNSPEC_AESDEC
65  UNSPEC_AESDECLAST
66  UNSPEC_AESIMC
67  UNSPEC_AESKEYGENASSIST
68
69  ;; For PCLMUL support
70  UNSPEC_PCLMUL
71
72  ;; For AVX support
73  UNSPEC_PCMP
74  UNSPEC_VPERMIL
75  UNSPEC_VPERMIL2
76  UNSPEC_VPERMIL2F128
77  UNSPEC_CAST
78  UNSPEC_VTESTP
79  UNSPEC_VCVTPH2PS
80  UNSPEC_VCVTPS2PH
81
82  ;; For AVX2 support
83  UNSPEC_VPERMSI
84  UNSPEC_VPERMDF
85  UNSPEC_VPERMSF
86  UNSPEC_VPERMTI
87  UNSPEC_GATHER
88  UNSPEC_VSIBADDR
89])
90
91(define_c_enum "unspecv" [
92  UNSPECV_LDMXCSR
93  UNSPECV_STMXCSR
94  UNSPECV_CLFLUSH
95  UNSPECV_MONITOR
96  UNSPECV_MWAIT
97  UNSPECV_VZEROALL
98  UNSPECV_VZEROUPPER
99])
100
101;; All vector modes including V?TImode, used in move patterns.
102(define_mode_iterator V16
103  [(V32QI "TARGET_AVX") V16QI
104   (V16HI "TARGET_AVX") V8HI
105   (V8SI "TARGET_AVX") V4SI
106   (V4DI "TARGET_AVX") V2DI
107   (V2TI "TARGET_AVX") V1TI
108   (V8SF "TARGET_AVX") V4SF
109   (V4DF "TARGET_AVX") V2DF])
110
111;; All vector modes
112(define_mode_iterator V
113  [(V32QI "TARGET_AVX") V16QI
114   (V16HI "TARGET_AVX") V8HI
115   (V8SI "TARGET_AVX") V4SI
116   (V4DI "TARGET_AVX") V2DI
117   (V8SF "TARGET_AVX") V4SF
118   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
119
120;; All 128bit vector modes
121(define_mode_iterator V_128
122  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
123
124;; All 256bit vector modes
125(define_mode_iterator V_256
126  [V32QI V16HI V8SI V4DI V8SF V4DF])
127
128;; All vector float modes
129(define_mode_iterator VF
130  [(V8SF "TARGET_AVX") V4SF
131   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
132
133;; All SFmode vector float modes
134(define_mode_iterator VF1
135  [(V8SF "TARGET_AVX") V4SF])
136
137;; All DFmode vector float modes
138(define_mode_iterator VF2
139  [(V4DF "TARGET_AVX") V2DF])
140
141;; All 128bit vector float modes
142(define_mode_iterator VF_128
143  [V4SF (V2DF "TARGET_SSE2")])
144
145;; All 256bit vector float modes
146(define_mode_iterator VF_256
147  [V8SF V4DF])
148
149;; All vector integer modes
150(define_mode_iterator VI
151  [(V32QI "TARGET_AVX") V16QI
152   (V16HI "TARGET_AVX") V8HI
153   (V8SI "TARGET_AVX") V4SI
154   (V4DI "TARGET_AVX") V2DI])
155
156(define_mode_iterator VI_AVX2
157  [(V32QI "TARGET_AVX2") V16QI
158   (V16HI "TARGET_AVX2") V8HI
159   (V8SI "TARGET_AVX2") V4SI
160   (V4DI "TARGET_AVX2") V2DI])
161
162;; All QImode vector integer modes
163(define_mode_iterator VI1
164  [(V32QI "TARGET_AVX") V16QI])
165
166;; All DImode vector integer modes
167(define_mode_iterator VI8
168  [(V4DI "TARGET_AVX") V2DI])
169
170(define_mode_iterator VI1_AVX2
171  [(V32QI "TARGET_AVX2") V16QI])
172
173(define_mode_iterator VI2_AVX2
174  [(V16HI "TARGET_AVX2") V8HI])
175
176(define_mode_iterator VI4_AVX2
177  [(V8SI "TARGET_AVX2") V4SI])
178
179(define_mode_iterator VI8_AVX2
180  [(V4DI "TARGET_AVX2") V2DI])
181
182;; ??? We should probably use TImode instead.
183(define_mode_iterator VIMAX_AVX2
184  [(V2TI "TARGET_AVX2") V1TI])
185
186;; ??? This should probably be dropped in favor of VIMAX_AVX2.
187(define_mode_iterator SSESCALARMODE
188  [(V2TI "TARGET_AVX2") TI])
189
190(define_mode_iterator VI12_AVX2
191  [(V32QI "TARGET_AVX2") V16QI
192   (V16HI "TARGET_AVX2") V8HI])
193
194(define_mode_iterator VI24_AVX2
195  [(V16HI "TARGET_AVX2") V8HI
196   (V8SI "TARGET_AVX2") V4SI])
197
198(define_mode_iterator VI124_AVX2
199  [(V32QI "TARGET_AVX2") V16QI
200   (V16HI "TARGET_AVX2") V8HI
201   (V8SI "TARGET_AVX2") V4SI])
202
203(define_mode_iterator VI248_AVX2
204  [(V16HI "TARGET_AVX2") V8HI
205   (V8SI "TARGET_AVX2") V4SI
206   (V4DI "TARGET_AVX2") V2DI])
207
208(define_mode_iterator VI48_AVX2
209  [(V8SI "TARGET_AVX2") V4SI
210   (V4DI "TARGET_AVX2") V2DI])
211
212(define_mode_iterator V48_AVX2
213  [V4SF V2DF
214   V8SF V4DF
215   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
216   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
217
218(define_mode_attr sse2_avx2
219  [(V16QI "sse2") (V32QI "avx2")
220   (V8HI "sse2") (V16HI "avx2")
221   (V4SI "sse2") (V8SI "avx2")
222   (V2DI "sse2") (V4DI "avx2")
223   (V1TI "sse2") (V2TI "avx2")])
224
225(define_mode_attr ssse3_avx2
226   [(V16QI "ssse3") (V32QI "avx2")
227    (V8HI "ssse3") (V16HI "avx2")
228    (V4SI "ssse3") (V8SI "avx2")
229    (V2DI "ssse3") (V4DI "avx2")
230    (TI "ssse3") (V2TI "avx2")])
231
232(define_mode_attr sse4_1_avx2
233   [(V16QI "sse4_1") (V32QI "avx2")
234    (V8HI "sse4_1") (V16HI "avx2")
235    (V4SI "sse4_1") (V8SI "avx2")
236    (V2DI "sse4_1") (V4DI "avx2")])
237
238(define_mode_attr avx_avx2
239  [(V4SF "avx") (V2DF "avx")
240   (V8SF "avx") (V4DF "avx")
241   (V4SI "avx2") (V2DI "avx2")
242   (V8SI "avx2") (V4DI "avx2")])
243
244(define_mode_attr vec_avx2
245  [(V16QI "vec") (V32QI "avx2")
246   (V8HI "vec") (V16HI "avx2")
247   (V4SI "vec") (V8SI "avx2")
248   (V2DI "vec") (V4DI "avx2")])
249
250(define_mode_attr ssedoublemode
251  [(V16HI "V16SI") (V8HI "V8SI")])
252
253(define_mode_attr ssebytemode
254  [(V4DI "V32QI") (V2DI "V16QI")])
255
256;; All 128bit vector integer modes
257(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
258
259;; All 256bit vector integer modes
260(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
261
262;; Random 128bit vector integer mode combinations
263(define_mode_iterator VI12_128 [V16QI V8HI])
264(define_mode_iterator VI14_128 [V16QI V4SI])
265(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
266(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
267(define_mode_iterator VI24_128 [V8HI V4SI])
268(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
269(define_mode_iterator VI48_128 [V4SI V2DI])
270
271;; Random 256bit vector integer mode combinations
272(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
273(define_mode_iterator VI48_256 [V8SI V4DI])
274
275;; Int-float size matches
276(define_mode_iterator VI4F_128 [V4SI V4SF])
277(define_mode_iterator VI8F_128 [V2DI V2DF])
278(define_mode_iterator VI4F_256 [V8SI V8SF])
279(define_mode_iterator VI8F_256 [V4DI V4DF])
280
281;; Mapping from float mode to required SSE level
282(define_mode_attr sse
283  [(SF "sse") (DF "sse2")
284   (V4SF "sse") (V2DF "sse2")
285   (V8SF "avx") (V4DF "avx")])
286
287(define_mode_attr sse2
288  [(V16QI "sse2") (V32QI "avx")
289   (V2DI "sse2") (V4DI "avx")])
290
291(define_mode_attr sse3
292  [(V16QI "sse3") (V32QI "avx")])
293
294(define_mode_attr sse4_1
295  [(V4SF "sse4_1") (V2DF "sse4_1")
296   (V8SF "avx") (V4DF "avx")])
297
298(define_mode_attr avxsizesuffix
299  [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
300   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
301   (V8SF "256") (V4DF "256")
302   (V4SF "") (V2DF "")])
303
304;; SSE instruction mode
305(define_mode_attr sseinsnmode
306  [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
307   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
308   (V8SF "V8SF") (V4DF "V4DF")
309   (V4SF "V4SF") (V2DF "V2DF")
310   (TI "TI")])
311
312;; Mapping of vector float modes to an integer mode of the same size
313(define_mode_attr sseintvecmode
314  [(V8SF "V8SI") (V4DF "V4DI")
315   (V4SF "V4SI") (V2DF "V2DI")
316   (V8SI "V8SI") (V4DI "V4DI")
317   (V4SI "V4SI") (V2DI "V2DI")
318   (V16HI "V16HI") (V8HI "V8HI")
319   (V32QI "V32QI") (V16QI "V16QI")])
320
321(define_mode_attr sseintvecmodelower
322  [(V8SF "v8si") (V4DF "v4di")
323   (V4SF "v4si") (V2DF "v2di")
324   (V8SI "v8si") (V4DI "v4di")
325   (V4SI "v4si") (V2DI "v2di")
326   (V16HI "v16hi") (V8HI "v8hi")
327   (V32QI "v32qi") (V16QI "v16qi")])
328
329;; Mapping of vector modes to a vector mode of double size
330(define_mode_attr ssedoublevecmode
331  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
332   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
333   (V8SF "V16SF") (V4DF "V8DF")
334   (V4SF "V8SF") (V2DF "V4DF")])
335
336;; Mapping of vector modes to a vector mode of half size
337(define_mode_attr ssehalfvecmode
338  [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
339   (V16QI  "V8QI") (V8HI  "V4HI") (V4SI "V2SI")
340   (V8SF "V4SF") (V4DF "V2DF")
341   (V4SF "V2SF")])
342
343;; Mapping of vector modes back to the scalar modes
344(define_mode_attr ssescalarmode
345  [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
346   (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
347   (V8SF "SF") (V4DF "DF")
348   (V4SF "SF") (V2DF "DF")])
349
350;; Number of scalar elements in each vector type
351(define_mode_attr ssescalarnum
352  [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
353   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
354   (V8SF "8") (V4DF "4")
355   (V4SF "4") (V2DF "2")])
356
357;; SSE prefix for integer vector modes
358(define_mode_attr sseintprefix
359  [(V2DI "p") (V2DF "")
360   (V4DI "p") (V4DF "")
361   (V4SI "p") (V4SF "")
362   (V8SI "p") (V8SF "")])
363
364;; SSE scalar suffix for vector modes
365(define_mode_attr ssescalarmodesuffix
366  [(SF "ss") (DF "sd")
367   (V8SF "ss") (V4DF "sd")
368   (V4SF "ss") (V2DF "sd")
369   (V8SI "ss") (V4DI "sd")
370   (V4SI "d")])
371
372;; Pack/unpack vector modes
373(define_mode_attr sseunpackmode
374  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
375   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
376
377(define_mode_attr ssepackmode
378  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
379   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
380
381;; Mapping of the max integer size for xop rotate immediate constraint
382(define_mode_attr sserotatemax
383  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
384
385;; Mapping of mode to cast intrinsic name
386(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
387
388;; Instruction suffix for sign and zero extensions.
389(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
390
391;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
392(define_mode_attr i128
393  [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
394   (V8SI "%~128") (V4DI "%~128")])
395
396;; Mix-n-match
397(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
398
399;; Mapping of immediate bits for blend instructions
400(define_mode_attr blendbits
401  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
402
403;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
404
405;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
406;;
407;; Move patterns
408;;
409;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
410
411;; All of these patterns are enabled for SSE1 as well as SSE2.
412;; This is essential for maintaining stable calling conventions.
413
414(define_expand "mov<mode>"
415  [(set (match_operand:V16 0 "nonimmediate_operand" "")
416	(match_operand:V16 1 "nonimmediate_operand" ""))]
417  "TARGET_SSE"
418{
419  ix86_expand_vector_move (<MODE>mode, operands);
420  DONE;
421})
422
423(define_insn "*mov<mode>_internal"
424  [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
425	(match_operand:V16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
426  "TARGET_SSE
427   && (register_operand (operands[0], <MODE>mode)
428       || register_operand (operands[1], <MODE>mode))"
429{
430  switch (which_alternative)
431    {
432    case 0:
433      return standard_sse_constant_opcode (insn, operands[1]);
434    case 1:
435    case 2:
436      switch (get_attr_mode (insn))
437	{
438	case MODE_V8SF:
439	case MODE_V4SF:
440	  if (TARGET_AVX
441	      && (misaligned_operand (operands[0], <MODE>mode)
442		  || misaligned_operand (operands[1], <MODE>mode)))
443	    return "vmovups\t{%1, %0|%0, %1}";
444	  else
445	    return "%vmovaps\t{%1, %0|%0, %1}";
446
447	case MODE_V4DF:
448	case MODE_V2DF:
449	  if (TARGET_AVX
450	      && (misaligned_operand (operands[0], <MODE>mode)
451		  || misaligned_operand (operands[1], <MODE>mode)))
452	    return "vmovupd\t{%1, %0|%0, %1}";
453	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
454	    return "%vmovaps\t{%1, %0|%0, %1}";
455	  else
456	    return "%vmovapd\t{%1, %0|%0, %1}";
457
458	case MODE_OI:
459	case MODE_TI:
460	  if (TARGET_AVX
461	      && (misaligned_operand (operands[0], <MODE>mode)
462		  || misaligned_operand (operands[1], <MODE>mode)))
463	    return "vmovdqu\t{%1, %0|%0, %1}";
464	  else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
465	    return "%vmovaps\t{%1, %0|%0, %1}";
466	  else
467	    return "%vmovdqa\t{%1, %0|%0, %1}";
468
469	default:
470	  gcc_unreachable ();
471	}
472    default:
473      gcc_unreachable ();
474    }
475}
476  [(set_attr "type" "sselog1,ssemov,ssemov")
477   (set_attr "prefix" "maybe_vex")
478   (set (attr "mode")
479	(cond [(match_test "TARGET_AVX")
480		 (const_string "<sseinsnmode>")
481	       (ior (ior (match_test "optimize_function_for_size_p (cfun)")
482			 (not (match_test "TARGET_SSE2")))
483		    (and (eq_attr "alternative" "2")
484			 (match_test "TARGET_SSE_TYPELESS_STORES")))
485		 (const_string "V4SF")
486	       (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
487		 (const_string "V4SF")
488	       (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
489		 (const_string "V2DF")
490	      ]
491	  (const_string "TI")))])
492
493(define_insn "sse2_movq128"
494  [(set (match_operand:V2DI 0 "register_operand" "=x")
495	(vec_concat:V2DI
496	  (vec_select:DI
497	    (match_operand:V2DI 1 "nonimmediate_operand" "xm")
498	    (parallel [(const_int 0)]))
499	  (const_int 0)))]
500  "TARGET_SSE2"
501  "%vmovq\t{%1, %0|%0, %1}"
502  [(set_attr "type" "ssemov")
503   (set_attr "prefix" "maybe_vex")
504   (set_attr "mode" "TI")])
505
506;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
507;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
508;; from memory, we'd prefer to load the memory directly into the %xmm
509;; register.  To facilitate this happy circumstance, this pattern won't
510;; split until after register allocation.  If the 64-bit value didn't
511;; come from memory, this is the best we can do.  This is much better
512;; than storing %edx:%eax into a stack temporary and loading an %xmm
513;; from there.
514
515(define_insn_and_split "movdi_to_sse"
516  [(parallel
517    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
518	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
519     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
520  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
521  "#"
522  "&& reload_completed"
523  [(const_int 0)]
524{
525 if (register_operand (operands[1], DImode))
526   {
527      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
528	 Assemble the 64-bit DImode value in an xmm register.  */
529      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
530				  gen_rtx_SUBREG (SImode, operands[1], 0)));
531      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
532				  gen_rtx_SUBREG (SImode, operands[1], 4)));
533      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
534					     operands[2]));
535    }
536 else if (memory_operand (operands[1], DImode))
537   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
538				  operands[1], const0_rtx));
539 else
540   gcc_unreachable ();
541})
542
543(define_split
544  [(set (match_operand:V4SF 0 "register_operand" "")
545	(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
546  "TARGET_SSE && reload_completed"
547  [(set (match_dup 0)
548	(vec_merge:V4SF
549	  (vec_duplicate:V4SF (match_dup 1))
550	  (match_dup 2)
551	  (const_int 1)))]
552{
553  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
554  operands[2] = CONST0_RTX (V4SFmode);
555})
556
557(define_split
558  [(set (match_operand:V2DF 0 "register_operand" "")
559	(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
560  "TARGET_SSE2 && reload_completed"
561  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
562{
563  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
564  operands[2] = CONST0_RTX (DFmode);
565})
566
567(define_expand "push<mode>1"
568  [(match_operand:V16 0 "register_operand" "")]
569  "TARGET_SSE"
570{
571  ix86_expand_push (<MODE>mode, operands[0]);
572  DONE;
573})
574
575(define_expand "movmisalign<mode>"
576  [(set (match_operand:V16 0 "nonimmediate_operand" "")
577	(match_operand:V16 1 "nonimmediate_operand" ""))]
578  "TARGET_SSE"
579{
580  ix86_expand_vector_move_misalign (<MODE>mode, operands);
581  DONE;
582})
583
584(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
585  [(set (match_operand:VF 0 "register_operand" "=x")
586	(unspec:VF
587	  [(match_operand:VF 1 "memory_operand" "m")]
588	  UNSPEC_LOADU))]
589  "TARGET_SSE"
590  "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
591  [(set_attr "type" "ssemov")
592   (set_attr "movu" "1")
593   (set_attr "prefix" "maybe_vex")
594   (set_attr "mode" "<MODE>")])
595
596(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
597  [(set (match_operand:VF 0 "memory_operand" "=m")
598	(unspec:VF
599	  [(match_operand:VF 1 "register_operand" "x")]
600	  UNSPEC_STOREU))]
601  "TARGET_SSE"
602  "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
603  [(set_attr "type" "ssemov")
604   (set_attr "movu" "1")
605   (set_attr "prefix" "maybe_vex")
606   (set_attr "mode" "<MODE>")])
607
608(define_insn "<sse2>_loaddqu<avxsizesuffix>"
609  [(set (match_operand:VI1 0 "register_operand" "=x")
610	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
611		    UNSPEC_LOADU))]
612  "TARGET_SSE2"
613  "%vmovdqu\t{%1, %0|%0, %1}"
614  [(set_attr "type" "ssemov")
615   (set_attr "movu" "1")
616   (set (attr "prefix_data16")
617     (if_then_else
618       (match_test "TARGET_AVX")
619     (const_string "*")
620     (const_string "1")))
621   (set_attr "prefix" "maybe_vex")
622   (set_attr "mode" "<sseinsnmode>")])
623
624(define_insn "<sse2>_storedqu<avxsizesuffix>"
625  [(set (match_operand:VI1 0 "memory_operand" "=m")
626	(unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
627		    UNSPEC_STOREU))]
628  "TARGET_SSE2"
629  "%vmovdqu\t{%1, %0|%0, %1}"
630  [(set_attr "type" "ssemov")
631   (set_attr "movu" "1")
632   (set (attr "prefix_data16")
633     (if_then_else
634       (match_test "TARGET_AVX")
635     (const_string "*")
636     (const_string "1")))
637   (set_attr "prefix" "maybe_vex")
638   (set_attr "mode" "<sseinsnmode>")])
639
640(define_insn "<sse3>_lddqu<avxsizesuffix>"
641  [(set (match_operand:VI1 0 "register_operand" "=x")
642	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
643		    UNSPEC_LDDQU))]
644  "TARGET_SSE3"
645  "%vlddqu\t{%1, %0|%0, %1}"
646  [(set_attr "type" "ssemov")
647   (set_attr "movu" "1")
648   (set (attr "prefix_data16")
649     (if_then_else
650       (match_test "TARGET_AVX")
651     (const_string "*")
652     (const_string "0")))
653   (set (attr "prefix_rep")
654     (if_then_else
655       (match_test "TARGET_AVX")
656     (const_string "*")
657     (const_string "1")))
658   (set_attr "prefix" "maybe_vex")
659   (set_attr "mode" "<sseinsnmode>")])
660
661(define_insn "sse2_movnti<mode>"
662  [(set (match_operand:SWI48 0 "memory_operand" "=m")
663	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
664		      UNSPEC_MOVNT))]
665  "TARGET_SSE2"
666  "movnti\t{%1, %0|%0, %1}"
667  [(set_attr "type" "ssemov")
668   (set_attr "prefix_data16" "0")
669   (set_attr "mode" "<MODE>")])
670
671(define_insn "<sse>_movnt<mode>"
672  [(set (match_operand:VF 0 "memory_operand" "=m")
673	(unspec:VF [(match_operand:VF 1 "register_operand" "x")]
674		   UNSPEC_MOVNT))]
675  "TARGET_SSE"
676  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
677  [(set_attr "type" "ssemov")
678   (set_attr "prefix" "maybe_vex")
679   (set_attr "mode" "<MODE>")])
680
681(define_insn "<sse2>_movnt<mode>"
682  [(set (match_operand:VI8 0 "memory_operand" "=m")
683	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
684		    UNSPEC_MOVNT))]
685  "TARGET_SSE2"
686  "%vmovntdq\t{%1, %0|%0, %1}"
687  [(set_attr "type" "ssecvt")
688   (set (attr "prefix_data16")
689     (if_then_else
690       (match_test "TARGET_AVX")
691     (const_string "*")
692     (const_string "1")))
693   (set_attr "prefix" "maybe_vex")
694   (set_attr "mode" "<sseinsnmode>")])
695
696; Expand patterns for non-temporal stores.  At the moment, only those
697; that directly map to insns are defined; it would be possible to
698; define patterns for other modes that would expand to several insns.
699
700;; Modes handled by storent patterns.
701(define_mode_iterator STORENT_MODE
702  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
703   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
704   (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
705   (V8SF "TARGET_AVX") V4SF
706   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
707
708(define_expand "storent<mode>"
709  [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
710	(unspec:STORENT_MODE
711	  [(match_operand:STORENT_MODE 1 "register_operand" "")]
712	  UNSPEC_MOVNT))]
713  "TARGET_SSE")
714
715;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
716;;
717;; Parallel floating point arithmetic
718;;
719;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
720
721(define_expand "<code><mode>2"
722  [(set (match_operand:VF 0 "register_operand" "")
723	(absneg:VF
724	  (match_operand:VF 1 "register_operand" "")))]
725  "TARGET_SSE"
726  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
727
728(define_insn_and_split "*absneg<mode>2"
729  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
730	(match_operator:VF 3 "absneg_operator"
731	  [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
732   (use (match_operand:VF 2 "nonimmediate_operand"    "xm,0, xm,x"))]
733  "TARGET_SSE"
734  "#"
735  "&& reload_completed"
736  [(const_int 0)]
737{
738  enum rtx_code absneg_op;
739  rtx op1, op2;
740  rtx t;
741
742  if (TARGET_AVX)
743    {
744      if (MEM_P (operands[1]))
745	op1 = operands[2], op2 = operands[1];
746      else
747	op1 = operands[1], op2 = operands[2];
748    }
749  else
750    {
751      op1 = operands[0];
752      if (rtx_equal_p (operands[0], operands[1]))
753	op2 = operands[2];
754      else
755	op2 = operands[1];
756    }
757
758  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
759  t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
760  t = gen_rtx_SET (VOIDmode, operands[0], t);
761  emit_insn (t);
762  DONE;
763}
764  [(set_attr "isa" "noavx,noavx,avx,avx")])
765
766(define_expand "<plusminus_insn><mode>3"
767  [(set (match_operand:VF 0 "register_operand" "")
768	(plusminus:VF
769	  (match_operand:VF 1 "nonimmediate_operand" "")
770	  (match_operand:VF 2 "nonimmediate_operand" "")))]
771  "TARGET_SSE"
772  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
773
774(define_insn "*<plusminus_insn><mode>3"
775  [(set (match_operand:VF 0 "register_operand" "=x,x")
776	(plusminus:VF
777	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
778	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
779  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
780  "@
781   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
782   v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
783  [(set_attr "isa" "noavx,avx")
784   (set_attr "type" "sseadd")
785   (set_attr "prefix" "orig,vex")
786   (set_attr "mode" "<MODE>")])
787
788(define_insn "<sse>_vm<plusminus_insn><mode>3"
789  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
790	(vec_merge:VF_128
791	  (plusminus:VF_128
792	    (match_operand:VF_128 1 "register_operand" "0,x")
793	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
794	  (match_dup 1)
795	  (const_int 1)))]
796  "TARGET_SSE"
797  "@
798   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
799   v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
800  [(set_attr "isa" "noavx,avx")
801   (set_attr "type" "sseadd")
802   (set_attr "prefix" "orig,vex")
803   (set_attr "mode" "<ssescalarmode>")])
804
805(define_expand "mul<mode>3"
806  [(set (match_operand:VF 0 "register_operand" "")
807	(mult:VF
808	  (match_operand:VF 1 "nonimmediate_operand" "")
809	  (match_operand:VF 2 "nonimmediate_operand" "")))]
810  "TARGET_SSE"
811  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
812
813(define_insn "*mul<mode>3"
814  [(set (match_operand:VF 0 "register_operand" "=x,x")
815	(mult:VF
816	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
817	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
818  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
819  "@
820   mul<ssemodesuffix>\t{%2, %0|%0, %2}
821   vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
822  [(set_attr "isa" "noavx,avx")
823   (set_attr "type" "ssemul")
824   (set_attr "prefix" "orig,vex")
825   (set_attr "mode" "<MODE>")])
826
827(define_insn "<sse>_vmmul<mode>3"
828  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
829	(vec_merge:VF_128
830	  (mult:VF_128
831	    (match_operand:VF_128 1 "register_operand" "0,x")
832	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
833	  (match_dup 1)
834	  (const_int 1)))]
835  "TARGET_SSE"
836  "@
837   mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
838   vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
839  [(set_attr "isa" "noavx,avx")
840   (set_attr "type" "ssemul")
841   (set_attr "prefix" "orig,vex")
842   (set_attr "mode" "<ssescalarmode>")])
843
844(define_expand "div<mode>3"
845  [(set (match_operand:VF2 0 "register_operand" "")
846	(div:VF2 (match_operand:VF2 1 "register_operand" "")
847		 (match_operand:VF2 2 "nonimmediate_operand" "")))]
848  "TARGET_SSE2"
849  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
850
851(define_expand "div<mode>3"
852  [(set (match_operand:VF1 0 "register_operand" "")
853	(div:VF1 (match_operand:VF1 1 "register_operand" "")
854		 (match_operand:VF1 2 "nonimmediate_operand" "")))]
855  "TARGET_SSE"
856{
857  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
858
859  if (TARGET_SSE_MATH
860      && TARGET_RECIP_VEC_DIV
861      && !optimize_insn_for_size_p ()
862      && flag_finite_math_only && !flag_trapping_math
863      && flag_unsafe_math_optimizations)
864    {
865      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
866      DONE;
867    }
868})
869
870(define_insn "<sse>_div<mode>3"
871  [(set (match_operand:VF 0 "register_operand" "=x,x")
872	(div:VF
873	  (match_operand:VF 1 "register_operand" "0,x")
874	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
875  "TARGET_SSE"
876  "@
877   div<ssemodesuffix>\t{%2, %0|%0, %2}
878   vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
879  [(set_attr "isa" "noavx,avx")
880   (set_attr "type" "ssediv")
881   (set_attr "prefix" "orig,vex")
882   (set_attr "mode" "<MODE>")])
883
884(define_insn "<sse>_vmdiv<mode>3"
885  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
886	(vec_merge:VF_128
887	  (div:VF_128
888	    (match_operand:VF_128 1 "register_operand" "0,x")
889	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
890	  (match_dup 1)
891	  (const_int 1)))]
892  "TARGET_SSE"
893  "@
894   div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
895   vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
896  [(set_attr "isa" "noavx,avx")
897   (set_attr "type" "ssediv")
898   (set_attr "prefix" "orig,vex")
899   (set_attr "mode" "<ssescalarmode>")])
900
901(define_insn "<sse>_rcp<mode>2"
902  [(set (match_operand:VF1 0 "register_operand" "=x")
903	(unspec:VF1
904	  [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
905  "TARGET_SSE"
906  "%vrcpps\t{%1, %0|%0, %1}"
907  [(set_attr "type" "sse")
908   (set_attr "atom_sse_attr" "rcp")
909   (set_attr "prefix" "maybe_vex")
910   (set_attr "mode" "<MODE>")])
911
912(define_insn "sse_vmrcpv4sf2"
913  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
914	(vec_merge:V4SF
915	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
916		       UNSPEC_RCP)
917	  (match_operand:V4SF 2 "register_operand" "0,x")
918	  (const_int 1)))]
919  "TARGET_SSE"
920  "@
921   rcpss\t{%1, %0|%0, %1}
922   vrcpss\t{%1, %2, %0|%0, %2, %1}"
923  [(set_attr "isa" "noavx,avx")
924   (set_attr "type" "sse")
925   (set_attr "atom_sse_attr" "rcp")
926   (set_attr "prefix" "orig,vex")
927   (set_attr "mode" "SF")])
928
929(define_expand "sqrt<mode>2"
930  [(set (match_operand:VF2 0 "register_operand" "")
931	(sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
932  "TARGET_SSE2")
933
934(define_expand "sqrt<mode>2"
935  [(set (match_operand:VF1 0 "register_operand" "")
936	(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
937  "TARGET_SSE"
938{
939  if (TARGET_SSE_MATH
940      && TARGET_RECIP_VEC_SQRT
941      && !optimize_insn_for_size_p ()
942      && flag_finite_math_only && !flag_trapping_math
943      && flag_unsafe_math_optimizations)
944    {
945      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
946      DONE;
947    }
948})
949
950(define_insn "<sse>_sqrt<mode>2"
951  [(set (match_operand:VF 0 "register_operand" "=x")
952	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
953  "TARGET_SSE"
954  "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
955  [(set_attr "type" "sse")
956   (set_attr "atom_sse_attr" "sqrt")
957   (set_attr "prefix" "maybe_vex")
958   (set_attr "mode" "<MODE>")])
959
960(define_insn "<sse>_vmsqrt<mode>2"
961  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
962	(vec_merge:VF_128
963	  (sqrt:VF_128
964	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
965	  (match_operand:VF_128 2 "register_operand" "0,x")
966	  (const_int 1)))]
967  "TARGET_SSE"
968  "@
969   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
970   vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
971  [(set_attr "isa" "noavx,avx")
972   (set_attr "type" "sse")
973   (set_attr "atom_sse_attr" "sqrt")
974   (set_attr "prefix" "orig,vex")
975   (set_attr "mode" "<ssescalarmode>")])
976
977(define_expand "rsqrt<mode>2"
978  [(set (match_operand:VF1 0 "register_operand" "")
979	(unspec:VF1
980	  [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
981  "TARGET_SSE_MATH"
982{
983  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
984  DONE;
985})
986
987(define_insn "<sse>_rsqrt<mode>2"
988  [(set (match_operand:VF1 0 "register_operand" "=x")
989	(unspec:VF1
990	  [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
991  "TARGET_SSE"
992  "%vrsqrtps\t{%1, %0|%0, %1}"
993  [(set_attr "type" "sse")
994   (set_attr "prefix" "maybe_vex")
995   (set_attr "mode" "<MODE>")])
996
997(define_insn "sse_vmrsqrtv4sf2"
998  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
999	(vec_merge:V4SF
1000	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1001		       UNSPEC_RSQRT)
1002	  (match_operand:V4SF 2 "register_operand" "0,x")
1003	  (const_int 1)))]
1004  "TARGET_SSE"
1005  "@
1006   rsqrtss\t{%1, %0|%0, %1}
1007   vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1008  [(set_attr "isa" "noavx,avx")
1009   (set_attr "type" "sse")
1010   (set_attr "prefix" "orig,vex")
1011   (set_attr "mode" "SF")])
1012
1013;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1014;; isn't really correct, as those rtl operators aren't defined when
1015;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
1016
1017(define_expand "<code><mode>3"
1018  [(set (match_operand:VF 0 "register_operand" "")
1019	(smaxmin:VF
1020	  (match_operand:VF 1 "nonimmediate_operand" "")
1021	  (match_operand:VF 2 "nonimmediate_operand" "")))]
1022  "TARGET_SSE"
1023{
1024  if (!flag_finite_math_only)
1025    operands[1] = force_reg (<MODE>mode, operands[1]);
1026  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1027})
1028
1029(define_insn "*<code><mode>3_finite"
1030  [(set (match_operand:VF 0 "register_operand" "=x,x")
1031	(smaxmin:VF
1032	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1033	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1034  "TARGET_SSE && flag_finite_math_only
1035   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1036  "@
1037   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1038   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1039  [(set_attr "isa" "noavx,avx")
1040   (set_attr "type" "sseadd")
1041   (set_attr "prefix" "orig,vex")
1042   (set_attr "mode" "<MODE>")])
1043
1044(define_insn "*<code><mode>3"
1045  [(set (match_operand:VF 0 "register_operand" "=x,x")
1046	(smaxmin:VF
1047	  (match_operand:VF 1 "register_operand" "0,x")
1048	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1049  "TARGET_SSE && !flag_finite_math_only"
1050  "@
1051   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1052   v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1053  [(set_attr "isa" "noavx,avx")
1054   (set_attr "type" "sseadd")
1055   (set_attr "prefix" "orig,vex")
1056   (set_attr "mode" "<MODE>")])
1057
1058(define_insn "<sse>_vm<code><mode>3"
1059  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1060	(vec_merge:VF_128
1061	  (smaxmin:VF_128
1062	    (match_operand:VF_128 1 "register_operand" "0,x")
1063	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1064	 (match_dup 1)
1065	 (const_int 1)))]
1066  "TARGET_SSE"
1067  "@
1068   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1069   v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1070  [(set_attr "isa" "noavx,avx")
1071   (set_attr "type" "sse")
1072   (set_attr "prefix" "orig,vex")
1073   (set_attr "mode" "<ssescalarmode>")])
1074
1075;; These versions of the min/max patterns implement exactly the operations
1076;;   min = (op1 < op2 ? op1 : op2)
1077;;   max = (!(op1 < op2) ? op1 : op2)
1078;; Their operands are not commutative, and thus they may be used in the
1079;; presence of -0.0 and NaN.
1080
1081(define_insn "*ieee_smin<mode>3"
1082  [(set (match_operand:VF 0 "register_operand" "=x,x")
1083	(unspec:VF
1084	  [(match_operand:VF 1 "register_operand" "0,x")
1085	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1086	 UNSPEC_IEEE_MIN))]
1087  "TARGET_SSE"
1088  "@
1089   min<ssemodesuffix>\t{%2, %0|%0, %2}
1090   vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1091  [(set_attr "isa" "noavx,avx")
1092   (set_attr "type" "sseadd")
1093   (set_attr "prefix" "orig,vex")
1094   (set_attr "mode" "<MODE>")])
1095
1096(define_insn "*ieee_smax<mode>3"
1097  [(set (match_operand:VF 0 "register_operand" "=x,x")
1098	(unspec:VF
1099	  [(match_operand:VF 1 "register_operand" "0,x")
1100	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1101	 UNSPEC_IEEE_MAX))]
1102  "TARGET_SSE"
1103  "@
1104   max<ssemodesuffix>\t{%2, %0|%0, %2}
1105   vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1106  [(set_attr "isa" "noavx,avx")
1107   (set_attr "type" "sseadd")
1108   (set_attr "prefix" "orig,vex")
1109   (set_attr "mode" "<MODE>")])
1110
1111(define_insn "avx_addsubv4df3"
1112  [(set (match_operand:V4DF 0 "register_operand" "=x")
1113	(vec_merge:V4DF
1114	  (plus:V4DF
1115	    (match_operand:V4DF 1 "register_operand" "x")
1116	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1117	  (minus:V4DF (match_dup 1) (match_dup 2))
1118	  (const_int 10)))]
1119  "TARGET_AVX"
1120  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1121  [(set_attr "type" "sseadd")
1122   (set_attr "prefix" "vex")
1123   (set_attr "mode" "V4DF")])
1124
1125(define_insn "sse3_addsubv2df3"
1126  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1127	(vec_merge:V2DF
1128	  (plus:V2DF
1129	    (match_operand:V2DF 1 "register_operand" "0,x")
1130	    (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1131	  (minus:V2DF (match_dup 1) (match_dup 2))
1132	  (const_int 2)))]
1133  "TARGET_SSE3"
1134  "@
1135   addsubpd\t{%2, %0|%0, %2}
1136   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1137  [(set_attr "isa" "noavx,avx")
1138   (set_attr "type" "sseadd")
1139   (set_attr "atom_unit" "complex")
1140   (set_attr "prefix" "orig,vex")
1141   (set_attr "mode" "V2DF")])
1142
1143(define_insn "avx_addsubv8sf3"
1144  [(set (match_operand:V8SF 0 "register_operand" "=x")
1145	(vec_merge:V8SF
1146	  (plus:V8SF
1147	    (match_operand:V8SF 1 "register_operand" "x")
1148	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1149	  (minus:V8SF (match_dup 1) (match_dup 2))
1150	  (const_int 170)))]
1151  "TARGET_AVX"
1152  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1153  [(set_attr "type" "sseadd")
1154   (set_attr "prefix" "vex")
1155   (set_attr "mode" "V8SF")])
1156
1157(define_insn "sse3_addsubv4sf3"
1158  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1159	(vec_merge:V4SF
1160	  (plus:V4SF
1161	    (match_operand:V4SF 1 "register_operand" "0,x")
1162	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1163	  (minus:V4SF (match_dup 1) (match_dup 2))
1164	  (const_int 10)))]
1165  "TARGET_SSE3"
1166  "@
1167   addsubps\t{%2, %0|%0, %2}
1168   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1169  [(set_attr "isa" "noavx,avx")
1170   (set_attr "type" "sseadd")
1171   (set_attr "prefix" "orig,vex")
1172   (set_attr "prefix_rep" "1,*")
1173   (set_attr "mode" "V4SF")])
1174
1175(define_insn "avx_h<plusminus_insn>v4df3"
1176  [(set (match_operand:V4DF 0 "register_operand" "=x")
1177	(vec_concat:V4DF
1178	  (vec_concat:V2DF
1179	    (plusminus:DF
1180	      (vec_select:DF
1181		(match_operand:V4DF 1 "register_operand" "x")
1182		(parallel [(const_int 0)]))
1183	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1184	    (plusminus:DF
1185	      (vec_select:DF
1186		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
1187		(parallel [(const_int 0)]))
1188	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1189	  (vec_concat:V2DF
1190	    (plusminus:DF
1191	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1192	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1193	    (plusminus:DF
1194	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1195	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1196  "TARGET_AVX"
1197  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1198  [(set_attr "type" "sseadd")
1199   (set_attr "prefix" "vex")
1200   (set_attr "mode" "V4DF")])
1201
1202(define_insn "sse3_h<plusminus_insn>v2df3"
1203  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1204	(vec_concat:V2DF
1205	  (plusminus:DF
1206	    (vec_select:DF
1207	      (match_operand:V2DF 1 "register_operand" "0,x")
1208	      (parallel [(const_int 0)]))
1209	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1210	  (plusminus:DF
1211	    (vec_select:DF
1212	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1213	      (parallel [(const_int 0)]))
1214	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1215  "TARGET_SSE3"
1216  "@
1217   h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1218   vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1219  [(set_attr "isa" "noavx,avx")
1220   (set_attr "type" "sseadd")
1221   (set_attr "prefix" "orig,vex")
1222   (set_attr "mode" "V2DF")])
1223
1224(define_insn "avx_h<plusminus_insn>v8sf3"
1225  [(set (match_operand:V8SF 0 "register_operand" "=x")
1226	(vec_concat:V8SF
1227	  (vec_concat:V4SF
1228	    (vec_concat:V2SF
1229	      (plusminus:SF
1230		(vec_select:SF
1231		  (match_operand:V8SF 1 "register_operand" "x")
1232		  (parallel [(const_int 0)]))
1233		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1234	      (plusminus:SF
1235		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1236		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1237	    (vec_concat:V2SF
1238	      (plusminus:SF
1239		(vec_select:SF
1240		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1241		  (parallel [(const_int 0)]))
1242		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1243	      (plusminus:SF
1244		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1245		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1246	  (vec_concat:V4SF
1247	    (vec_concat:V2SF
1248	      (plusminus:SF
1249		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1250		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1251	      (plusminus:SF
1252		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1253		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1254	    (vec_concat:V2SF
1255	      (plusminus:SF
1256		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1257		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1258	      (plusminus:SF
1259		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1260		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1261  "TARGET_AVX"
1262  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1263  [(set_attr "type" "sseadd")
1264   (set_attr "prefix" "vex")
1265   (set_attr "mode" "V8SF")])
1266
1267(define_insn "sse3_h<plusminus_insn>v4sf3"
1268  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1269	(vec_concat:V4SF
1270	  (vec_concat:V2SF
1271	    (plusminus:SF
1272	      (vec_select:SF
1273		(match_operand:V4SF 1 "register_operand" "0,x")
1274		(parallel [(const_int 0)]))
1275	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1276	    (plusminus:SF
1277	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1278	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1279	  (vec_concat:V2SF
1280	    (plusminus:SF
1281	      (vec_select:SF
1282		(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1283		(parallel [(const_int 0)]))
1284	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1285	    (plusminus:SF
1286	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1287	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1288  "TARGET_SSE3"
1289  "@
1290   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1291   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1292  [(set_attr "isa" "noavx,avx")
1293   (set_attr "type" "sseadd")
1294   (set_attr "atom_unit" "complex")
1295   (set_attr "prefix" "orig,vex")
1296   (set_attr "prefix_rep" "1,*")
1297   (set_attr "mode" "V4SF")])
1298
1299(define_expand "reduc_splus_v4df"
1300  [(match_operand:V4DF 0 "register_operand" "")
1301   (match_operand:V4DF 1 "register_operand" "")]
1302  "TARGET_AVX"
1303{
1304  rtx tmp = gen_reg_rtx (V4DFmode);
1305  rtx tmp2 = gen_reg_rtx (V4DFmode);
1306  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1307  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1308  emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1309  DONE;
1310})
1311
1312(define_expand "reduc_splus_v2df"
1313  [(match_operand:V2DF 0 "register_operand" "")
1314   (match_operand:V2DF 1 "register_operand" "")]
1315  "TARGET_SSE3"
1316{
1317  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1318  DONE;
1319})
1320
1321(define_expand "reduc_splus_v8sf"
1322  [(match_operand:V8SF 0 "register_operand" "")
1323   (match_operand:V8SF 1 "register_operand" "")]
1324  "TARGET_AVX"
1325{
1326  rtx tmp = gen_reg_rtx (V8SFmode);
1327  rtx tmp2 = gen_reg_rtx (V8SFmode);
1328  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1329  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1330  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1331  emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1332  DONE;
1333})
1334
1335(define_expand "reduc_splus_v4sf"
1336  [(match_operand:V4SF 0 "register_operand" "")
1337   (match_operand:V4SF 1 "register_operand" "")]
1338  "TARGET_SSE"
1339{
1340  if (TARGET_SSE3)
1341    {
1342      rtx tmp = gen_reg_rtx (V4SFmode);
1343      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1344      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1345    }
1346  else
1347    ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1348  DONE;
1349})
1350
1351;; Modes handled by reduc_sm{in,ax}* patterns.
1352(define_mode_iterator REDUC_SMINMAX_MODE
1353  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1354   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1355   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1356   (V4SF "TARGET_SSE")])
1357
1358(define_expand "reduc_<code>_<mode>"
1359  [(smaxmin:REDUC_SMINMAX_MODE
1360     (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1361     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1362  ""
1363{
1364  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1365  DONE;
1366})
1367
1368(define_expand "reduc_<code>_<mode>"
1369  [(umaxmin:VI_256
1370     (match_operand:VI_256 0 "register_operand" "")
1371     (match_operand:VI_256 1 "register_operand" ""))]
1372  "TARGET_AVX2"
1373{
1374  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1375  DONE;
1376})
1377
1378(define_expand "reduc_umin_v8hi"
1379  [(umin:V8HI
1380     (match_operand:V8HI 0 "register_operand" "")
1381     (match_operand:V8HI 1 "register_operand" ""))]
1382  "TARGET_SSE4_1"
1383{
1384  ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1385  DONE;
1386})
1387
1388;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1389;;
1390;; Parallel floating point comparisons
1391;;
1392;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1393
1394(define_insn "avx_cmp<mode>3"
1395  [(set (match_operand:VF 0 "register_operand" "=x")
1396	(unspec:VF
1397	  [(match_operand:VF 1 "register_operand" "x")
1398	   (match_operand:VF 2 "nonimmediate_operand" "xm")
1399	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
1400	  UNSPEC_PCMP))]
1401  "TARGET_AVX"
1402  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403  [(set_attr "type" "ssecmp")
1404   (set_attr "length_immediate" "1")
1405   (set_attr "prefix" "vex")
1406   (set_attr "mode" "<MODE>")])
1407
1408(define_insn "avx_vmcmp<mode>3"
1409  [(set (match_operand:VF_128 0 "register_operand" "=x")
1410	(vec_merge:VF_128
1411	  (unspec:VF_128
1412	    [(match_operand:VF_128 1 "register_operand" "x")
1413	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1414	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
1415	    UNSPEC_PCMP)
1416	 (match_dup 1)
1417	 (const_int 1)))]
1418  "TARGET_AVX"
1419  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1420  [(set_attr "type" "ssecmp")
1421   (set_attr "length_immediate" "1")
1422   (set_attr "prefix" "vex")
1423   (set_attr "mode" "<ssescalarmode>")])
1424
1425(define_insn "*<sse>_maskcmp<mode>3_comm"
1426  [(set (match_operand:VF 0 "register_operand" "=x,x")
1427	(match_operator:VF 3 "sse_comparison_operator"
1428	  [(match_operand:VF 1 "register_operand" "%0,x")
1429	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1430  "TARGET_SSE
1431   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1432  "@
1433   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1434   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1435  [(set_attr "isa" "noavx,avx")
1436   (set_attr "type" "ssecmp")
1437   (set_attr "length_immediate" "1")
1438   (set_attr "prefix" "orig,vex")
1439   (set_attr "mode" "<MODE>")])
1440
1441(define_insn "<sse>_maskcmp<mode>3"
1442  [(set (match_operand:VF 0 "register_operand" "=x,x")
1443	(match_operator:VF 3 "sse_comparison_operator"
1444	  [(match_operand:VF 1 "register_operand" "0,x")
1445	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1446  "TARGET_SSE"
1447  "@
1448   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1449   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1450  [(set_attr "isa" "noavx,avx")
1451   (set_attr "type" "ssecmp")
1452   (set_attr "length_immediate" "1")
1453   (set_attr "prefix" "orig,vex")
1454   (set_attr "mode" "<MODE>")])
1455
1456(define_insn "<sse>_vmmaskcmp<mode>3"
1457  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1458	(vec_merge:VF_128
1459	 (match_operator:VF_128 3 "sse_comparison_operator"
1460	   [(match_operand:VF_128 1 "register_operand" "0,x")
1461	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1462	 (match_dup 1)
1463	 (const_int 1)))]
1464  "TARGET_SSE"
1465  "@
1466   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1467   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1468  [(set_attr "isa" "noavx,avx")
1469   (set_attr "type" "ssecmp")
1470   (set_attr "length_immediate" "1,*")
1471   (set_attr "prefix" "orig,vex")
1472   (set_attr "mode" "<ssescalarmode>")])
1473
1474(define_insn "<sse>_comi"
1475  [(set (reg:CCFP FLAGS_REG)
1476	(compare:CCFP
1477	  (vec_select:MODEF
1478	    (match_operand:<ssevecmode> 0 "register_operand" "x")
1479	    (parallel [(const_int 0)]))
1480	  (vec_select:MODEF
1481	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1482	    (parallel [(const_int 0)]))))]
1483  "SSE_FLOAT_MODE_P (<MODE>mode)"
1484  "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1485  [(set_attr "type" "ssecomi")
1486   (set_attr "prefix" "maybe_vex")
1487   (set_attr "prefix_rep" "0")
1488   (set (attr "prefix_data16")
1489	(if_then_else (eq_attr "mode" "DF")
1490		      (const_string "1")
1491		      (const_string "0")))
1492   (set_attr "mode" "<MODE>")])
1493
1494(define_insn "<sse>_ucomi"
1495  [(set (reg:CCFPU FLAGS_REG)
1496	(compare:CCFPU
1497	  (vec_select:MODEF
1498	    (match_operand:<ssevecmode> 0 "register_operand" "x")
1499	    (parallel [(const_int 0)]))
1500	  (vec_select:MODEF
1501	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1502	    (parallel [(const_int 0)]))))]
1503  "SSE_FLOAT_MODE_P (<MODE>mode)"
1504  "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1505  [(set_attr "type" "ssecomi")
1506   (set_attr "prefix" "maybe_vex")
1507   (set_attr "prefix_rep" "0")
1508   (set (attr "prefix_data16")
1509	(if_then_else (eq_attr "mode" "DF")
1510		      (const_string "1")
1511		      (const_string "0")))
1512   (set_attr "mode" "<MODE>")])
1513
1514(define_expand "vcond<V_256:mode><VF_256:mode>"
1515  [(set (match_operand:V_256 0 "register_operand" "")
1516	(if_then_else:V_256
1517	  (match_operator 3 ""
1518	    [(match_operand:VF_256 4 "nonimmediate_operand" "")
1519	     (match_operand:VF_256 5 "nonimmediate_operand" "")])
1520	  (match_operand:V_256 1 "general_operand" "")
1521	  (match_operand:V_256 2 "general_operand" "")))]
1522  "TARGET_AVX
1523   && (GET_MODE_NUNITS (<V_256:MODE>mode)
1524       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1525{
1526  bool ok = ix86_expand_fp_vcond (operands);
1527  gcc_assert (ok);
1528  DONE;
1529})
1530
1531(define_expand "vcond<V_128:mode><VF_128:mode>"
1532  [(set (match_operand:V_128 0 "register_operand" "")
1533	(if_then_else:V_128
1534	  (match_operator 3 ""
1535	    [(match_operand:VF_128 4 "nonimmediate_operand" "")
1536	     (match_operand:VF_128 5 "nonimmediate_operand" "")])
1537	  (match_operand:V_128 1 "general_operand" "")
1538	  (match_operand:V_128 2 "general_operand" "")))]
1539  "TARGET_SSE
1540   && (GET_MODE_NUNITS (<V_128:MODE>mode)
1541       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1542{
1543  bool ok = ix86_expand_fp_vcond (operands);
1544  gcc_assert (ok);
1545  DONE;
1546})
1547
1548;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1549;;
1550;; Parallel floating point logical operations
1551;;
1552;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1553
1554(define_insn "<sse>_andnot<mode>3"
1555  [(set (match_operand:VF 0 "register_operand" "=x,x")
1556	(and:VF
1557	  (not:VF
1558	    (match_operand:VF 1 "register_operand" "0,x"))
1559	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1560  "TARGET_SSE"
1561{
1562  static char buf[32];
1563  const char *insn;
1564  const char *suffix
1565    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1566
1567  switch (which_alternative)
1568    {
1569    case 0:
1570      insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1571      break;
1572    case 1:
1573      insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1574      break;
1575    default:
1576      gcc_unreachable ();
1577    }
1578
1579  snprintf (buf, sizeof (buf), insn, suffix);
1580  return buf;
1581}
1582  [(set_attr "isa" "noavx,avx")
1583   (set_attr "type" "sselog")
1584   (set_attr "prefix" "orig,vex")
1585   (set_attr "mode" "<MODE>")])
1586
1587(define_expand "<code><mode>3"
1588  [(set (match_operand:VF 0 "register_operand" "")
1589	(any_logic:VF
1590	  (match_operand:VF 1 "nonimmediate_operand" "")
1591	  (match_operand:VF 2 "nonimmediate_operand" "")))]
1592  "TARGET_SSE"
1593  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1594
1595(define_insn "*<code><mode>3"
1596  [(set (match_operand:VF 0 "register_operand" "=x,x")
1597	(any_logic:VF
1598	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1599	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1600  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1601{
1602  static char buf[32];
1603  const char *insn;
1604  const char *suffix
1605    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1606
1607  switch (which_alternative)
1608    {
1609    case 0:
1610      insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1611      break;
1612    case 1:
1613      insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1614      break;
1615    default:
1616      gcc_unreachable ();
1617    }
1618
1619  snprintf (buf, sizeof (buf), insn, suffix);
1620  return buf;
1621}
1622  [(set_attr "isa" "noavx,avx")
1623   (set_attr "type" "sselog")
1624   (set_attr "prefix" "orig,vex")
1625   (set_attr "mode" "<MODE>")])
1626
1627(define_expand "copysign<mode>3"
1628  [(set (match_dup 4)
1629	(and:VF
1630	  (not:VF (match_dup 3))
1631	  (match_operand:VF 1 "nonimmediate_operand" "")))
1632   (set (match_dup 5)
1633	(and:VF (match_dup 3)
1634		(match_operand:VF 2 "nonimmediate_operand" "")))
1635   (set (match_operand:VF 0 "register_operand" "")
1636	(ior:VF (match_dup 4) (match_dup 5)))]
1637  "TARGET_SSE"
1638{
1639  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1640
1641  operands[4] = gen_reg_rtx (<MODE>mode);
1642  operands[5] = gen_reg_rtx (<MODE>mode);
1643})
1644
1645;; Also define scalar versions.  These are used for abs, neg, and
1646;; conditional move.  Using subregs into vector modes causes register
1647;; allocation lossage.  These patterns do not allow memory operands
1648;; because the native instructions read the full 128-bits.
1649
1650(define_insn "*andnot<mode>3"
1651  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1652	(and:MODEF
1653	  (not:MODEF
1654	    (match_operand:MODEF 1 "register_operand" "0,x"))
1655	    (match_operand:MODEF 2 "register_operand" "x,x")))]
1656  "SSE_FLOAT_MODE_P (<MODE>mode)"
1657{
1658  static char buf[32];
1659  const char *insn;
1660  const char *suffix
1661    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1662
1663  switch (which_alternative)
1664    {
1665    case 0:
1666      insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1667      break;
1668    case 1:
1669      insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1670      break;
1671    default:
1672      gcc_unreachable ();
1673    }
1674
1675  snprintf (buf, sizeof (buf), insn, suffix);
1676  return buf;
1677}
1678  [(set_attr "isa" "noavx,avx")
1679   (set_attr "type" "sselog")
1680   (set_attr "prefix" "orig,vex")
1681   (set_attr "mode" "<ssevecmode>")])
1682
1683(define_insn "*<code><mode>3"
1684  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1685	(any_logic:MODEF
1686	  (match_operand:MODEF 1 "register_operand" "%0,x")
1687	  (match_operand:MODEF 2 "register_operand" "x,x")))]
1688  "SSE_FLOAT_MODE_P (<MODE>mode)"
1689{
1690  static char buf[32];
1691  const char *insn;
1692  const char *suffix
1693    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1694
1695  switch (which_alternative)
1696    {
1697    case 0:
1698      insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1699      break;
1700    case 1:
1701      insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1702      break;
1703    default:
1704      gcc_unreachable ();
1705    }
1706
1707  snprintf (buf, sizeof (buf), insn, suffix);
1708  return buf;
1709}
1710  [(set_attr "isa" "noavx,avx")
1711   (set_attr "type" "sselog")
1712   (set_attr "prefix" "orig,vex")
1713   (set_attr "mode" "<ssevecmode>")])
1714
1715;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1716;;
1717;; FMA floating point multiply/accumulate instructions.  These include
1718;; scalar versions of the instructions as well as vector versions.
1719;;
1720;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1721
1722;; The standard names for scalar FMA are only available with SSE math enabled.
1723(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
1724				(DF "TARGET_SSE_MATH")
1725				V4SF V2DF V8SF V4DF])
1726
1727(define_expand "fma<mode>4"
1728  [(set (match_operand:FMAMODEM 0 "register_operand")
1729	(fma:FMAMODEM
1730	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
1731	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
1732	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
1733  "TARGET_FMA || TARGET_FMA4")
1734
1735(define_expand "fms<mode>4"
1736  [(set (match_operand:FMAMODEM 0 "register_operand")
1737	(fma:FMAMODEM
1738	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
1739	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
1740	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
1741  "TARGET_FMA || TARGET_FMA4")
1742
1743(define_expand "fnma<mode>4"
1744  [(set (match_operand:FMAMODEM 0 "register_operand")
1745	(fma:FMAMODEM
1746	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
1747	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
1748	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
1749  "TARGET_FMA || TARGET_FMA4")
1750
1751(define_expand "fnms<mode>4"
1752  [(set (match_operand:FMAMODEM 0 "register_operand")
1753	(fma:FMAMODEM
1754	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
1755	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
1756	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
1757  "TARGET_FMA || TARGET_FMA4")
1758
1759;; The builtins for intrinsics are not constrained by SSE math enabled.
1760(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
1761
1762(define_expand "fma4i_fmadd_<mode>"
1763  [(set (match_operand:FMAMODE 0 "register_operand")
1764	(fma:FMAMODE
1765	  (match_operand:FMAMODE 1 "nonimmediate_operand")
1766	  (match_operand:FMAMODE 2 "nonimmediate_operand")
1767	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1768  "TARGET_FMA || TARGET_FMA4")
1769
1770(define_insn "*fma_fmadd_<mode>"
1771  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1772	(fma:FMAMODE
1773	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")
1774	  (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
1775	  (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1776  "TARGET_FMA || TARGET_FMA4"
1777  "@
1778   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1779   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1780   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1781   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1782   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1783  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1784   (set_attr "type" "ssemuladd")
1785   (set_attr "mode" "<MODE>")])
1786
1787(define_insn "*fma_fmsub_<mode>"
1788  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1789	(fma:FMAMODE
1790	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x, x,x")
1791	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
1792	  (neg:FMAMODE
1793	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1794  "TARGET_FMA || TARGET_FMA4"
1795  "@
1796   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1797   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1798   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1799   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1800   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1801  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1802   (set_attr "type" "ssemuladd")
1803   (set_attr "mode" "<MODE>")])
1804
1805(define_insn "*fma_fnmadd_<mode>"
1806  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1807	(fma:FMAMODE
1808	  (neg:FMAMODE
1809	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1810	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
1811	  (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0,xm,x")))]
1812  "TARGET_FMA || TARGET_FMA4"
1813  "@
1814   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1815   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1816   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1817   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1818   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1819  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1820   (set_attr "type" "ssemuladd")
1821   (set_attr "mode" "<MODE>")])
1822
1823(define_insn "*fma_fnmsub_<mode>"
1824  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
1825	(fma:FMAMODE
1826	  (neg:FMAMODE
1827	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
1828	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
1829	  (neg:FMAMODE
1830	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
1831  "TARGET_FMA || TARGET_FMA4"
1832  "@
1833   vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1834   vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1835   vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1836   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1837   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1838  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1839   (set_attr "type" "ssemuladd")
1840   (set_attr "mode" "<MODE>")])
1841
1842;; FMA parallel floating point multiply addsub and subadd operations.
1843
1844;; It would be possible to represent these without the UNSPEC as
1845;;
1846;; (vec_merge
1847;;   (fma op1 op2 op3)
1848;;   (fma op1 op2 (neg op3))
1849;;   (merge-const))
1850;;
1851;; But this doesn't seem useful in practice.
1852
1853(define_expand "fmaddsub_<mode>"
1854  [(set (match_operand:VF 0 "register_operand")
1855	(unspec:VF
1856	  [(match_operand:VF 1 "nonimmediate_operand")
1857	   (match_operand:VF 2 "nonimmediate_operand")
1858	   (match_operand:VF 3 "nonimmediate_operand")]
1859	  UNSPEC_FMADDSUB))]
1860  "TARGET_FMA || TARGET_FMA4")
1861
1862(define_insn "*fma_fmaddsub_<mode>"
1863  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1864	(unspec:VF
1865	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x")
1866	   (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m")
1867	   (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")]
1868	  UNSPEC_FMADDSUB))]
1869  "TARGET_FMA || TARGET_FMA4"
1870  "@
1871   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1872   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1873   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1874   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1875   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1876  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1877   (set_attr "type" "ssemuladd")
1878   (set_attr "mode" "<MODE>")])
1879
1880(define_insn "*fma_fmsubadd_<mode>"
1881  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x")
1882	(unspec:VF
1883	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0,x, x,x")
1884	   (match_operand:VF   2 "nonimmediate_operand" "xm, x,xm,x,m")
1885	   (neg:VF
1886	     (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))]
1887	  UNSPEC_FMADDSUB))]
1888  "TARGET_FMA || TARGET_FMA4"
1889  "@
1890   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1891   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1892   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
1893   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
1894   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1895  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
1896   (set_attr "type" "ssemuladd")
1897   (set_attr "mode" "<MODE>")])
1898
1899;; FMA3 floating point scalar intrinsics. These merge result with
1900;; high-order elements from the destination register.
1901
1902(define_expand "fmai_vmfmadd_<mode>"
1903  [(set (match_operand:VF_128 0 "register_operand")
1904	(vec_merge:VF_128
1905	  (fma:VF_128
1906	    (match_operand:VF_128 1 "nonimmediate_operand")
1907	    (match_operand:VF_128 2 "nonimmediate_operand")
1908	    (match_operand:VF_128 3 "nonimmediate_operand"))
1909	  (match_dup 1)
1910	  (const_int 1)))]
1911  "TARGET_FMA")
1912
1913(define_insn "*fmai_fmadd_<mode>"
1914  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1915        (vec_merge:VF_128
1916	  (fma:VF_128
1917	    (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
1918	    (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")
1919	    (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))
1920	  (match_dup 1)
1921	  (const_int 1)))]
1922  "TARGET_FMA"
1923  "@
1924   vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1925   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1926  [(set_attr "type" "ssemuladd")
1927   (set_attr "mode" "<MODE>")])
1928
1929(define_insn "*fmai_fmsub_<mode>"
1930  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1931        (vec_merge:VF_128
1932	  (fma:VF_128
1933	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
1934	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x")
1935	    (neg:VF_128
1936	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1937	  (match_dup 1)
1938	  (const_int 1)))]
1939  "TARGET_FMA"
1940  "@
1941   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1942   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1943  [(set_attr "type" "ssemuladd")
1944   (set_attr "mode" "<MODE>")])
1945
1946(define_insn "*fmai_fnmadd_<mode>"
1947  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1948        (vec_merge:VF_128
1949	  (fma:VF_128
1950	    (neg:VF_128
1951	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1952	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
1953	    (match_operand:VF_128   3 "nonimmediate_operand" " x,xm"))
1954	  (match_dup 1)
1955	  (const_int 1)))]
1956  "TARGET_FMA"
1957  "@
1958   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1959   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1960  [(set_attr "type" "ssemuladd")
1961   (set_attr "mode" "<MODE>")])
1962
1963(define_insn "*fmai_fnmsub_<mode>"
1964  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1965        (vec_merge:VF_128
1966	  (fma:VF_128
1967	    (neg:VF_128
1968	      (match_operand:VF_128 2 "nonimmediate_operand" "xm, x"))
1969	    (match_operand:VF_128   1 "nonimmediate_operand" " 0, 0")
1970	    (neg:VF_128
1971	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")))
1972	  (match_dup 1)
1973	  (const_int 1)))]
1974  "TARGET_FMA"
1975  "@
1976   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1977   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"
1978  [(set_attr "type" "ssemuladd")
1979   (set_attr "mode" "<MODE>")])
1980
1981;; FMA4 floating point scalar intrinsics.  These write the
1982;; entire destination register, with the high-order elements zeroed.
1983
1984(define_expand "fma4i_vmfmadd_<mode>"
1985  [(set (match_operand:VF_128 0 "register_operand")
1986	(vec_merge:VF_128
1987	  (fma:VF_128
1988	    (match_operand:VF_128 1 "nonimmediate_operand")
1989	    (match_operand:VF_128 2 "nonimmediate_operand")
1990	    (match_operand:VF_128 3 "nonimmediate_operand"))
1991	  (match_dup 4)
1992	  (const_int 1)))]
1993  "TARGET_FMA4"
1994{
1995  operands[4] = CONST0_RTX (<MODE>mode);
1996})
1997
1998(define_insn "*fma4i_vmfmadd_<mode>"
1999  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2000	(vec_merge:VF_128
2001	  (fma:VF_128
2002	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2003	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2004	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
2005	  (match_operand:VF_128 4 "const0_operand" "")
2006	  (const_int 1)))]
2007  "TARGET_FMA4"
2008  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2009  [(set_attr "type" "ssemuladd")
2010   (set_attr "mode" "<MODE>")])
2011
2012(define_insn "*fma4i_vmfmsub_<mode>"
2013  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2014	(vec_merge:VF_128
2015	  (fma:VF_128
2016	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
2017	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
2018	    (neg:VF_128
2019	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
2020	  (match_operand:VF_128 4 "const0_operand" "")
2021	  (const_int 1)))]
2022  "TARGET_FMA4"
2023  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2024  [(set_attr "type" "ssemuladd")
2025   (set_attr "mode" "<MODE>")])
2026
2027(define_insn "*fma4i_vmfnmadd_<mode>"
2028  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2029	(vec_merge:VF_128
2030	  (fma:VF_128
2031	    (neg:VF_128
2032	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2033	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
2034	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
2035	  (match_operand:VF_128 4 "const0_operand" "")
2036	  (const_int 1)))]
2037  "TARGET_FMA4"
2038  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2039  [(set_attr "type" "ssemuladd")
2040   (set_attr "mode" "<MODE>")])
2041
2042(define_insn "*fma4i_vmfnmsub_<mode>"
2043  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2044	(vec_merge:VF_128
2045	  (fma:VF_128
2046	    (neg:VF_128
2047	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
2048	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
2049	    (neg:VF_128
2050	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
2051	  (match_operand:VF_128 4 "const0_operand" "")
2052	  (const_int 1)))]
2053  "TARGET_FMA4"
2054  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2055  [(set_attr "type" "ssemuladd")
2056   (set_attr "mode" "<MODE>")])
2057
2058;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2059;;
2060;; Parallel single-precision floating point conversion operations
2061;;
2062;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2063
2064(define_insn "sse_cvtpi2ps"
2065  [(set (match_operand:V4SF 0 "register_operand" "=x")
2066	(vec_merge:V4SF
2067	  (vec_duplicate:V4SF
2068	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2069	  (match_operand:V4SF 1 "register_operand" "0")
2070	  (const_int 3)))]
2071  "TARGET_SSE"
2072  "cvtpi2ps\t{%2, %0|%0, %2}"
2073  [(set_attr "type" "ssecvt")
2074   (set_attr "mode" "V4SF")])
2075
2076(define_insn "sse_cvtps2pi"
2077  [(set (match_operand:V2SI 0 "register_operand" "=y")
2078	(vec_select:V2SI
2079	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2080		       UNSPEC_FIX_NOTRUNC)
2081	  (parallel [(const_int 0) (const_int 1)])))]
2082  "TARGET_SSE"
2083  "cvtps2pi\t{%1, %0|%0, %1}"
2084  [(set_attr "type" "ssecvt")
2085   (set_attr "unit" "mmx")
2086   (set_attr "mode" "DI")])
2087
2088(define_insn "sse_cvttps2pi"
2089  [(set (match_operand:V2SI 0 "register_operand" "=y")
2090	(vec_select:V2SI
2091	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2092	  (parallel [(const_int 0) (const_int 1)])))]
2093  "TARGET_SSE"
2094  "cvttps2pi\t{%1, %0|%0, %1}"
2095  [(set_attr "type" "ssecvt")
2096   (set_attr "unit" "mmx")
2097   (set_attr "prefix_rep" "0")
2098   (set_attr "mode" "SF")])
2099
2100(define_insn "sse_cvtsi2ss"
2101  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2102	(vec_merge:V4SF
2103	  (vec_duplicate:V4SF
2104	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2105	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2106	  (const_int 1)))]
2107  "TARGET_SSE"
2108  "@
2109   cvtsi2ss\t{%2, %0|%0, %2}
2110   cvtsi2ss\t{%2, %0|%0, %2}
2111   vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2112  [(set_attr "isa" "noavx,noavx,avx")
2113   (set_attr "type" "sseicvt")
2114   (set_attr "athlon_decode" "vector,double,*")
2115   (set_attr "amdfam10_decode" "vector,double,*")
2116   (set_attr "bdver1_decode" "double,direct,*")
2117   (set_attr "prefix" "orig,orig,vex")
2118   (set_attr "mode" "SF")])
2119
2120(define_insn "sse_cvtsi2ssq"
2121  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2122	(vec_merge:V4SF
2123	  (vec_duplicate:V4SF
2124	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2125	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2126	  (const_int 1)))]
2127  "TARGET_SSE && TARGET_64BIT"
2128  "@
2129   cvtsi2ssq\t{%2, %0|%0, %2}
2130   cvtsi2ssq\t{%2, %0|%0, %2}
2131   vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2132  [(set_attr "isa" "noavx,noavx,avx")
2133   (set_attr "type" "sseicvt")
2134   (set_attr "athlon_decode" "vector,double,*")
2135   (set_attr "amdfam10_decode" "vector,double,*")
2136   (set_attr "bdver1_decode" "double,direct,*")
2137   (set_attr "length_vex" "*,*,4")
2138   (set_attr "prefix_rex" "1,1,*")
2139   (set_attr "prefix" "orig,orig,vex")
2140   (set_attr "mode" "SF")])
2141
2142(define_insn "sse_cvtss2si"
2143  [(set (match_operand:SI 0 "register_operand" "=r,r")
2144	(unspec:SI
2145	  [(vec_select:SF
2146	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2147	     (parallel [(const_int 0)]))]
2148	  UNSPEC_FIX_NOTRUNC))]
2149  "TARGET_SSE"
2150  "%vcvtss2si\t{%1, %0|%0, %1}"
2151  [(set_attr "type" "sseicvt")
2152   (set_attr "athlon_decode" "double,vector")
2153   (set_attr "bdver1_decode" "double,double")
2154   (set_attr "prefix_rep" "1")
2155   (set_attr "prefix" "maybe_vex")
2156   (set_attr "mode" "SI")])
2157
2158(define_insn "sse_cvtss2si_2"
2159  [(set (match_operand:SI 0 "register_operand" "=r,r")
2160	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2161		   UNSPEC_FIX_NOTRUNC))]
2162  "TARGET_SSE"
2163  "%vcvtss2si\t{%1, %0|%0, %1}"
2164  [(set_attr "type" "sseicvt")
2165   (set_attr "athlon_decode" "double,vector")
2166   (set_attr "amdfam10_decode" "double,double")
2167   (set_attr "bdver1_decode" "double,double")
2168   (set_attr "prefix_rep" "1")
2169   (set_attr "prefix" "maybe_vex")
2170   (set_attr "mode" "SI")])
2171
2172(define_insn "sse_cvtss2siq"
2173  [(set (match_operand:DI 0 "register_operand" "=r,r")
2174	(unspec:DI
2175	  [(vec_select:SF
2176	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2177	     (parallel [(const_int 0)]))]
2178	  UNSPEC_FIX_NOTRUNC))]
2179  "TARGET_SSE && TARGET_64BIT"
2180  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2181  [(set_attr "type" "sseicvt")
2182   (set_attr "athlon_decode" "double,vector")
2183   (set_attr "bdver1_decode" "double,double")
2184   (set_attr "prefix_rep" "1")
2185   (set_attr "prefix" "maybe_vex")
2186   (set_attr "mode" "DI")])
2187
2188(define_insn "sse_cvtss2siq_2"
2189  [(set (match_operand:DI 0 "register_operand" "=r,r")
2190	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2191		   UNSPEC_FIX_NOTRUNC))]
2192  "TARGET_SSE && TARGET_64BIT"
2193  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2194  [(set_attr "type" "sseicvt")
2195   (set_attr "athlon_decode" "double,vector")
2196   (set_attr "amdfam10_decode" "double,double")
2197   (set_attr "bdver1_decode" "double,double")
2198   (set_attr "prefix_rep" "1")
2199   (set_attr "prefix" "maybe_vex")
2200   (set_attr "mode" "DI")])
2201
2202(define_insn "sse_cvttss2si"
2203  [(set (match_operand:SI 0 "register_operand" "=r,r")
2204	(fix:SI
2205	  (vec_select:SF
2206	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2207	    (parallel [(const_int 0)]))))]
2208  "TARGET_SSE"
2209  "%vcvttss2si\t{%1, %0|%0, %1}"
2210  [(set_attr "type" "sseicvt")
2211   (set_attr "athlon_decode" "double,vector")
2212   (set_attr "amdfam10_decode" "double,double")
2213   (set_attr "bdver1_decode" "double,double")
2214   (set_attr "prefix_rep" "1")
2215   (set_attr "prefix" "maybe_vex")
2216   (set_attr "mode" "SI")])
2217
2218(define_insn "sse_cvttss2siq"
2219  [(set (match_operand:DI 0 "register_operand" "=r,r")
2220	(fix:DI
2221	  (vec_select:SF
2222	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2223	    (parallel [(const_int 0)]))))]
2224  "TARGET_SSE && TARGET_64BIT"
2225  "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2226  [(set_attr "type" "sseicvt")
2227   (set_attr "athlon_decode" "double,vector")
2228   (set_attr "amdfam10_decode" "double,double")
2229   (set_attr "bdver1_decode" "double,double")
2230   (set_attr "prefix_rep" "1")
2231   (set_attr "prefix" "maybe_vex")
2232   (set_attr "mode" "DI")])
2233
2234(define_insn "float<sseintvecmodelower><mode>2"
2235  [(set (match_operand:VF1 0 "register_operand" "=x")
2236	(float:VF1
2237	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2238  "TARGET_SSE2"
2239  "%vcvtdq2ps\t{%1, %0|%0, %1}"
2240  [(set_attr "type" "ssecvt")
2241   (set_attr "prefix" "maybe_vex")
2242   (set_attr "mode" "<sseinsnmode>")])
2243
2244(define_expand "floatuns<sseintvecmodelower><mode>2"
2245  [(match_operand:VF1 0 "register_operand" "")
2246   (match_operand:<sseintvecmode> 1 "register_operand" "")]
2247  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2248{
2249  ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2250  DONE;
2251})
2252
2253(define_insn "avx_cvtps2dq256"
2254  [(set (match_operand:V8SI 0 "register_operand" "=x")
2255	(unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2256		     UNSPEC_FIX_NOTRUNC))]
2257  "TARGET_AVX"
2258  "vcvtps2dq\t{%1, %0|%0, %1}"
2259  [(set_attr "type" "ssecvt")
2260   (set_attr "prefix" "vex")
2261   (set_attr "mode" "OI")])
2262
2263(define_insn "sse2_cvtps2dq"
2264  [(set (match_operand:V4SI 0 "register_operand" "=x")
2265	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2266		     UNSPEC_FIX_NOTRUNC))]
2267  "TARGET_SSE2"
2268  "%vcvtps2dq\t{%1, %0|%0, %1}"
2269  [(set_attr "type" "ssecvt")
2270   (set (attr "prefix_data16")
2271     (if_then_else
2272       (match_test "TARGET_AVX")
2273     (const_string "*")
2274     (const_string "1")))
2275   (set_attr "prefix" "maybe_vex")
2276   (set_attr "mode" "TI")])
2277
2278(define_insn "fix_truncv8sfv8si2"
2279  [(set (match_operand:V8SI 0 "register_operand" "=x")
2280	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2281  "TARGET_AVX"
2282  "vcvttps2dq\t{%1, %0|%0, %1}"
2283  [(set_attr "type" "ssecvt")
2284   (set_attr "prefix" "vex")
2285   (set_attr "mode" "OI")])
2286
2287(define_insn "fix_truncv4sfv4si2"
2288  [(set (match_operand:V4SI 0 "register_operand" "=x")
2289	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2290  "TARGET_SSE2"
2291  "%vcvttps2dq\t{%1, %0|%0, %1}"
2292  [(set_attr "type" "ssecvt")
2293   (set (attr "prefix_rep")
2294     (if_then_else
2295       (match_test "TARGET_AVX")
2296     (const_string "*")
2297     (const_string "1")))
2298   (set (attr "prefix_data16")
2299     (if_then_else
2300       (match_test "TARGET_AVX")
2301     (const_string "*")
2302     (const_string "0")))
2303   (set_attr "prefix_data16" "0")
2304   (set_attr "prefix" "maybe_vex")
2305   (set_attr "mode" "TI")])
2306
2307(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2308  [(match_operand:<sseintvecmode> 0 "register_operand" "")
2309   (match_operand:VF1 1 "register_operand" "")]
2310  "TARGET_SSE2"
2311{
2312  rtx tmp[3];
2313  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2314  tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2315  emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2316  emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2317  DONE;
2318})
2319
2320;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2321;;
2322;; Parallel double-precision floating point conversion operations
2323;;
2324;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2325
2326(define_insn "sse2_cvtpi2pd"
2327  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2328	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2329  "TARGET_SSE2"
2330  "cvtpi2pd\t{%1, %0|%0, %1}"
2331  [(set_attr "type" "ssecvt")
2332   (set_attr "unit" "mmx,*")
2333   (set_attr "prefix_data16" "1,*")
2334   (set_attr "mode" "V2DF")])
2335
2336(define_insn "sse2_cvtpd2pi"
2337  [(set (match_operand:V2SI 0 "register_operand" "=y")
2338	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2339		     UNSPEC_FIX_NOTRUNC))]
2340  "TARGET_SSE2"
2341  "cvtpd2pi\t{%1, %0|%0, %1}"
2342  [(set_attr "type" "ssecvt")
2343   (set_attr "unit" "mmx")
2344   (set_attr "bdver1_decode" "double")
2345   (set_attr "prefix_data16" "1")
2346   (set_attr "mode" "DI")])
2347
2348(define_insn "sse2_cvttpd2pi"
2349  [(set (match_operand:V2SI 0 "register_operand" "=y")
2350	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2351  "TARGET_SSE2"
2352  "cvttpd2pi\t{%1, %0|%0, %1}"
2353  [(set_attr "type" "ssecvt")
2354   (set_attr "unit" "mmx")
2355   (set_attr "bdver1_decode" "double")
2356   (set_attr "prefix_data16" "1")
2357   (set_attr "mode" "TI")])
2358
2359(define_insn "sse2_cvtsi2sd"
2360  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2361	(vec_merge:V2DF
2362	  (vec_duplicate:V2DF
2363	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2364	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2365	  (const_int 1)))]
2366  "TARGET_SSE2"
2367  "@
2368   cvtsi2sd\t{%2, %0|%0, %2}
2369   cvtsi2sd\t{%2, %0|%0, %2}
2370   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2371  [(set_attr "isa" "noavx,noavx,avx")
2372   (set_attr "type" "sseicvt")
2373   (set_attr "athlon_decode" "double,direct,*")
2374   (set_attr "amdfam10_decode" "vector,double,*")
2375   (set_attr "bdver1_decode" "double,direct,*")
2376   (set_attr "prefix" "orig,orig,vex")
2377   (set_attr "mode" "DF")])
2378
2379(define_insn "sse2_cvtsi2sdq"
2380  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2381	(vec_merge:V2DF
2382	  (vec_duplicate:V2DF
2383	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2384	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2385	  (const_int 1)))]
2386  "TARGET_SSE2 && TARGET_64BIT"
2387  "@
2388   cvtsi2sdq\t{%2, %0|%0, %2}
2389   cvtsi2sdq\t{%2, %0|%0, %2}
2390   vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2391  [(set_attr "isa" "noavx,noavx,avx")
2392   (set_attr "type" "sseicvt")
2393   (set_attr "athlon_decode" "double,direct,*")
2394   (set_attr "amdfam10_decode" "vector,double,*")
2395   (set_attr "bdver1_decode" "double,direct,*")
2396   (set_attr "length_vex" "*,*,4")
2397   (set_attr "prefix_rex" "1,1,*")
2398   (set_attr "prefix" "orig,orig,vex")
2399   (set_attr "mode" "DF")])
2400
2401(define_insn "sse2_cvtsd2si"
2402  [(set (match_operand:SI 0 "register_operand" "=r,r")
2403	(unspec:SI
2404	  [(vec_select:DF
2405	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2406	     (parallel [(const_int 0)]))]
2407	  UNSPEC_FIX_NOTRUNC))]
2408  "TARGET_SSE2"
2409  "%vcvtsd2si\t{%1, %0|%0, %1}"
2410  [(set_attr "type" "sseicvt")
2411   (set_attr "athlon_decode" "double,vector")
2412   (set_attr "bdver1_decode" "double,double")
2413   (set_attr "prefix_rep" "1")
2414   (set_attr "prefix" "maybe_vex")
2415   (set_attr "mode" "SI")])
2416
2417(define_insn "sse2_cvtsd2si_2"
2418  [(set (match_operand:SI 0 "register_operand" "=r,r")
2419	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2420		   UNSPEC_FIX_NOTRUNC))]
2421  "TARGET_SSE2"
2422  "%vcvtsd2si\t{%1, %0|%0, %1}"
2423  [(set_attr "type" "sseicvt")
2424   (set_attr "athlon_decode" "double,vector")
2425   (set_attr "amdfam10_decode" "double,double")
2426   (set_attr "bdver1_decode" "double,double")
2427   (set_attr "prefix_rep" "1")
2428   (set_attr "prefix" "maybe_vex")
2429   (set_attr "mode" "SI")])
2430
2431(define_insn "sse2_cvtsd2siq"
2432  [(set (match_operand:DI 0 "register_operand" "=r,r")
2433	(unspec:DI
2434	  [(vec_select:DF
2435	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2436	     (parallel [(const_int 0)]))]
2437	  UNSPEC_FIX_NOTRUNC))]
2438  "TARGET_SSE2 && TARGET_64BIT"
2439  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2440  [(set_attr "type" "sseicvt")
2441   (set_attr "athlon_decode" "double,vector")
2442   (set_attr "bdver1_decode" "double,double")
2443   (set_attr "prefix_rep" "1")
2444   (set_attr "prefix" "maybe_vex")
2445   (set_attr "mode" "DI")])
2446
2447(define_insn "sse2_cvtsd2siq_2"
2448  [(set (match_operand:DI 0 "register_operand" "=r,r")
2449	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2450		   UNSPEC_FIX_NOTRUNC))]
2451  "TARGET_SSE2 && TARGET_64BIT"
2452  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2453  [(set_attr "type" "sseicvt")
2454   (set_attr "athlon_decode" "double,vector")
2455   (set_attr "amdfam10_decode" "double,double")
2456   (set_attr "bdver1_decode" "double,double")
2457   (set_attr "prefix_rep" "1")
2458   (set_attr "prefix" "maybe_vex")
2459   (set_attr "mode" "DI")])
2460
2461(define_insn "sse2_cvttsd2si"
2462  [(set (match_operand:SI 0 "register_operand" "=r,r")
2463	(fix:SI
2464	  (vec_select:DF
2465	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2466	    (parallel [(const_int 0)]))))]
2467  "TARGET_SSE2"
2468  "%vcvttsd2si\t{%1, %0|%0, %1}"
2469  [(set_attr "type" "sseicvt")
2470   (set_attr "athlon_decode" "double,vector")
2471   (set_attr "amdfam10_decode" "double,double")
2472   (set_attr "bdver1_decode" "double,double")
2473   (set_attr "prefix_rep" "1")
2474   (set_attr "prefix" "maybe_vex")
2475   (set_attr "mode" "SI")])
2476
2477(define_insn "sse2_cvttsd2siq"
2478  [(set (match_operand:DI 0 "register_operand" "=r,r")
2479	(fix:DI
2480	  (vec_select:DF
2481	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2482	    (parallel [(const_int 0)]))))]
2483  "TARGET_SSE2 && TARGET_64BIT"
2484  "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2485  [(set_attr "type" "sseicvt")
2486   (set_attr "athlon_decode" "double,vector")
2487   (set_attr "amdfam10_decode" "double,double")
2488   (set_attr "bdver1_decode" "double,double")
2489   (set_attr "prefix_rep" "1")
2490   (set_attr "prefix" "maybe_vex")
2491   (set_attr "mode" "DI")])
2492
2493(define_insn "floatv4siv4df2"
2494  [(set (match_operand:V4DF 0 "register_operand" "=x")
2495	(float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2496  "TARGET_AVX"
2497  "vcvtdq2pd\t{%1, %0|%0, %1}"
2498  [(set_attr "type" "ssecvt")
2499   (set_attr "prefix" "vex")
2500   (set_attr "mode" "V4DF")])
2501
2502(define_insn "avx_cvtdq2pd256_2"
2503  [(set (match_operand:V4DF 0 "register_operand" "=x")
2504	(float:V4DF
2505	  (vec_select:V4SI
2506	    (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2507	    (parallel [(const_int 0) (const_int 1)
2508		       (const_int 2) (const_int 3)]))))]
2509  "TARGET_AVX"
2510  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2511  [(set_attr "type" "ssecvt")
2512   (set_attr "prefix" "vex")
2513   (set_attr "mode" "V4DF")])
2514
2515(define_insn "sse2_cvtdq2pd"
2516  [(set (match_operand:V2DF 0 "register_operand" "=x")
2517	(float:V2DF
2518	  (vec_select:V2SI
2519	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2520	    (parallel [(const_int 0) (const_int 1)]))))]
2521  "TARGET_SSE2"
2522  "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2523  [(set_attr "type" "ssecvt")
2524   (set_attr "prefix" "maybe_vex")
2525   (set_attr "mode" "V2DF")])
2526
2527(define_insn "avx_cvtpd2dq256"
2528  [(set (match_operand:V4SI 0 "register_operand" "=x")
2529	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2530		     UNSPEC_FIX_NOTRUNC))]
2531  "TARGET_AVX"
2532  "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2533  [(set_attr "type" "ssecvt")
2534   (set_attr "prefix" "vex")
2535   (set_attr "mode" "OI")])
2536
2537(define_expand "avx_cvtpd2dq256_2"
2538  [(set (match_operand:V8SI 0 "register_operand" "")
2539	(vec_concat:V8SI
2540	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2541		       UNSPEC_FIX_NOTRUNC)
2542	  (match_dup 2)))]
2543  "TARGET_AVX"
2544  "operands[2] = CONST0_RTX (V4SImode);")
2545
2546(define_insn "*avx_cvtpd2dq256_2"
2547  [(set (match_operand:V8SI 0 "register_operand" "=x")
2548	(vec_concat:V8SI
2549	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2550		       UNSPEC_FIX_NOTRUNC)
2551	  (match_operand:V4SI 2 "const0_operand" "")))]
2552  "TARGET_AVX"
2553  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2554  [(set_attr "type" "ssecvt")
2555   (set_attr "prefix" "vex")
2556   (set_attr "mode" "OI")])
2557
2558(define_expand "sse2_cvtpd2dq"
2559  [(set (match_operand:V4SI 0 "register_operand" "")
2560	(vec_concat:V4SI
2561	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2562		       UNSPEC_FIX_NOTRUNC)
2563	  (match_dup 2)))]
2564  "TARGET_SSE2"
2565  "operands[2] = CONST0_RTX (V2SImode);")
2566
2567(define_insn "*sse2_cvtpd2dq"
2568  [(set (match_operand:V4SI 0 "register_operand" "=x")
2569	(vec_concat:V4SI
2570	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2571		       UNSPEC_FIX_NOTRUNC)
2572	  (match_operand:V2SI 2 "const0_operand" "")))]
2573  "TARGET_SSE2"
2574{
2575  if (TARGET_AVX)
2576    return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2577  else
2578    return "cvtpd2dq\t{%1, %0|%0, %1}";
2579}
2580  [(set_attr "type" "ssecvt")
2581   (set_attr "prefix_rep" "1")
2582   (set_attr "prefix_data16" "0")
2583   (set_attr "prefix" "maybe_vex")
2584   (set_attr "mode" "TI")
2585   (set_attr "amdfam10_decode" "double")
2586   (set_attr "athlon_decode" "vector")
2587   (set_attr "bdver1_decode" "double")])
2588
2589(define_insn "fix_truncv4dfv4si2"
2590  [(set (match_operand:V4SI 0 "register_operand" "=x")
2591	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2592  "TARGET_AVX"
2593  "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2594  [(set_attr "type" "ssecvt")
2595   (set_attr "prefix" "vex")
2596   (set_attr "mode" "OI")])
2597
2598(define_expand "avx_cvttpd2dq256_2"
2599  [(set (match_operand:V8SI 0 "register_operand" "")
2600	(vec_concat:V8SI
2601	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2602	  (match_dup 2)))]
2603  "TARGET_AVX"
2604  "operands[2] = CONST0_RTX (V4SImode);")
2605
2606(define_insn "*avx_cvttpd2dq256_2"
2607  [(set (match_operand:V8SI 0 "register_operand" "=x")
2608	(vec_concat:V8SI
2609	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2610	  (match_operand:V4SI 2 "const0_operand" "")))]
2611  "TARGET_AVX"
2612  "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2613  [(set_attr "type" "ssecvt")
2614   (set_attr "prefix" "vex")
2615   (set_attr "mode" "OI")])
2616
2617(define_expand "sse2_cvttpd2dq"
2618  [(set (match_operand:V4SI 0 "register_operand" "")
2619	(vec_concat:V4SI
2620	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2621	  (match_dup 2)))]
2622  "TARGET_SSE2"
2623  "operands[2] = CONST0_RTX (V2SImode);")
2624
2625(define_insn "*sse2_cvttpd2dq"
2626  [(set (match_operand:V4SI 0 "register_operand" "=x")
2627	(vec_concat:V4SI
2628	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2629	  (match_operand:V2SI 2 "const0_operand" "")))]
2630  "TARGET_SSE2"
2631{
2632  if (TARGET_AVX)
2633    return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2634  else
2635    return "cvttpd2dq\t{%1, %0|%0, %1}";
2636}
2637  [(set_attr "type" "ssecvt")
2638   (set_attr "amdfam10_decode" "double")
2639   (set_attr "athlon_decode" "vector")
2640   (set_attr "bdver1_decode" "double")
2641   (set_attr "prefix" "maybe_vex")
2642   (set_attr "mode" "TI")])
2643
2644(define_insn "sse2_cvtsd2ss"
2645  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2646	(vec_merge:V4SF
2647	  (vec_duplicate:V4SF
2648	    (float_truncate:V2SF
2649	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2650	  (match_operand:V4SF 1 "register_operand" "0,0,x")
2651	  (const_int 1)))]
2652  "TARGET_SSE2"
2653  "@
2654   cvtsd2ss\t{%2, %0|%0, %2}
2655   cvtsd2ss\t{%2, %0|%0, %2}
2656   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2657  [(set_attr "isa" "noavx,noavx,avx")
2658   (set_attr "type" "ssecvt")
2659   (set_attr "athlon_decode" "vector,double,*")
2660   (set_attr "amdfam10_decode" "vector,double,*")
2661   (set_attr "bdver1_decode" "direct,direct,*")
2662   (set_attr "prefix" "orig,orig,vex")
2663   (set_attr "mode" "SF")])
2664
2665(define_insn "sse2_cvtss2sd"
2666  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2667	(vec_merge:V2DF
2668	  (float_extend:V2DF
2669	    (vec_select:V2SF
2670	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2671	      (parallel [(const_int 0) (const_int 1)])))
2672	  (match_operand:V2DF 1 "register_operand" "0,0,x")
2673	  (const_int 1)))]
2674  "TARGET_SSE2"
2675  "@
2676   cvtss2sd\t{%2, %0|%0, %2}
2677   cvtss2sd\t{%2, %0|%0, %2}
2678   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2679  [(set_attr "isa" "noavx,noavx,avx")
2680   (set_attr "type" "ssecvt")
2681   (set_attr "amdfam10_decode" "vector,double,*")
2682   (set_attr "athlon_decode" "direct,direct,*")
2683   (set_attr "bdver1_decode" "direct,direct,*")
2684   (set_attr "prefix" "orig,orig,vex")
2685   (set_attr "mode" "DF")])
2686
2687(define_insn "avx_cvtpd2ps256"
2688  [(set (match_operand:V4SF 0 "register_operand" "=x")
2689	(float_truncate:V4SF
2690	  (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2691  "TARGET_AVX"
2692  "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2693  [(set_attr "type" "ssecvt")
2694   (set_attr "prefix" "vex")
2695   (set_attr "mode" "V4SF")])
2696
2697(define_expand "sse2_cvtpd2ps"
2698  [(set (match_operand:V4SF 0 "register_operand" "")
2699	(vec_concat:V4SF
2700	  (float_truncate:V2SF
2701	    (match_operand:V2DF 1 "nonimmediate_operand" ""))
2702	  (match_dup 2)))]
2703  "TARGET_SSE2"
2704  "operands[2] = CONST0_RTX (V2SFmode);")
2705
2706(define_insn "*sse2_cvtpd2ps"
2707  [(set (match_operand:V4SF 0 "register_operand" "=x")
2708	(vec_concat:V4SF
2709	  (float_truncate:V2SF
2710	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2711	  (match_operand:V2SF 2 "const0_operand" "")))]
2712  "TARGET_SSE2"
2713{
2714  if (TARGET_AVX)
2715    return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2716  else
2717    return "cvtpd2ps\t{%1, %0|%0, %1}";
2718}
2719  [(set_attr "type" "ssecvt")
2720   (set_attr "amdfam10_decode" "double")
2721   (set_attr "athlon_decode" "vector")
2722   (set_attr "bdver1_decode" "double")
2723   (set_attr "prefix_data16" "1")
2724   (set_attr "prefix" "maybe_vex")
2725   (set_attr "mode" "V4SF")])
2726
2727(define_insn "avx_cvtps2pd256"
2728  [(set (match_operand:V4DF 0 "register_operand" "=x")
2729	(float_extend:V4DF
2730	  (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2731  "TARGET_AVX"
2732  "vcvtps2pd\t{%1, %0|%0, %1}"
2733  [(set_attr "type" "ssecvt")
2734   (set_attr "prefix" "vex")
2735   (set_attr "mode" "V4DF")])
2736
2737(define_insn "*avx_cvtps2pd256_2"
2738  [(set (match_operand:V4DF 0 "register_operand" "=x")
2739	(float_extend:V4DF
2740	  (vec_select:V4SF
2741	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2742	    (parallel [(const_int 0) (const_int 1)
2743		       (const_int 2) (const_int 3)]))))]
2744  "TARGET_AVX"
2745  "vcvtps2pd\t{%x1, %0|%0, %x1}"
2746  [(set_attr "type" "ssecvt")
2747   (set_attr "prefix" "vex")
2748   (set_attr "mode" "V4DF")])
2749
2750(define_insn "sse2_cvtps2pd"
2751  [(set (match_operand:V2DF 0 "register_operand" "=x")
2752	(float_extend:V2DF
2753	  (vec_select:V2SF
2754	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2755	    (parallel [(const_int 0) (const_int 1)]))))]
2756  "TARGET_SSE2"
2757  "%vcvtps2pd\t{%1, %0|%0, %q1}"
2758  [(set_attr "type" "ssecvt")
2759   (set_attr "amdfam10_decode" "direct")
2760   (set_attr "athlon_decode" "double")
2761   (set_attr "bdver1_decode" "double")
2762   (set_attr "prefix_data16" "0")
2763   (set_attr "prefix" "maybe_vex")
2764   (set_attr "mode" "V2DF")])
2765
2766(define_expand "vec_unpacks_hi_v4sf"
2767  [(set (match_dup 2)
2768   (vec_select:V4SF
2769     (vec_concat:V8SF
2770       (match_dup 2)
2771       (match_operand:V4SF 1 "nonimmediate_operand" ""))
2772     (parallel [(const_int 6) (const_int 7)
2773		(const_int 2) (const_int 3)])))
2774  (set (match_operand:V2DF 0 "register_operand" "")
2775   (float_extend:V2DF
2776     (vec_select:V2SF
2777       (match_dup 2)
2778       (parallel [(const_int 0) (const_int 1)]))))]
2779  "TARGET_SSE2"
2780  "operands[2] = gen_reg_rtx (V4SFmode);")
2781
2782(define_expand "vec_unpacks_hi_v8sf"
2783  [(set (match_dup 2)
2784	(vec_select:V4SF
2785	  (match_operand:V8SF 1 "nonimmediate_operand" "")
2786	  (parallel [(const_int 4) (const_int 5)
2787		     (const_int 6) (const_int 7)])))
2788   (set (match_operand:V4DF 0 "register_operand" "")
2789	(float_extend:V4DF
2790	  (match_dup 2)))]
2791  "TARGET_AVX"
2792  "operands[2] = gen_reg_rtx (V4SFmode);")
2793
2794(define_expand "vec_unpacks_lo_v4sf"
2795  [(set (match_operand:V2DF 0 "register_operand" "")
2796	(float_extend:V2DF
2797	  (vec_select:V2SF
2798	    (match_operand:V4SF 1 "nonimmediate_operand" "")
2799	    (parallel [(const_int 0) (const_int 1)]))))]
2800  "TARGET_SSE2")
2801
2802(define_expand "vec_unpacks_lo_v8sf"
2803  [(set (match_operand:V4DF 0 "register_operand" "")
2804	(float_extend:V4DF
2805	  (vec_select:V4SF
2806	    (match_operand:V8SF 1 "nonimmediate_operand" "")
2807	    (parallel [(const_int 0) (const_int 1)
2808		       (const_int 2) (const_int 3)]))))]
2809  "TARGET_AVX")
2810
2811(define_mode_attr sseunpackfltmode
2812  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2813
2814(define_expand "vec_unpacks_float_hi_<mode>"
2815  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2816   (match_operand:VI2_AVX2 1 "register_operand" "")]
2817  "TARGET_SSE2"
2818{
2819  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2820
2821  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2822  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2823			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2824  DONE;
2825})
2826
2827(define_expand "vec_unpacks_float_lo_<mode>"
2828  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2829   (match_operand:VI2_AVX2 1 "register_operand" "")]
2830  "TARGET_SSE2"
2831{
2832  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2833
2834  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2835  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2836			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2837  DONE;
2838})
2839
2840(define_expand "vec_unpacku_float_hi_<mode>"
2841  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2842   (match_operand:VI2_AVX2 1 "register_operand" "")]
2843  "TARGET_SSE2"
2844{
2845  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2846
2847  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2848  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2849			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2850  DONE;
2851})
2852
2853(define_expand "vec_unpacku_float_lo_<mode>"
2854  [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2855   (match_operand:VI2_AVX2 1 "register_operand" "")]
2856  "TARGET_SSE2"
2857{
2858  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2859
2860  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2861  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2862			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2863  DONE;
2864})
2865
2866(define_expand "vec_unpacks_float_hi_v4si"
2867  [(set (match_dup 2)
2868	(vec_select:V4SI
2869	  (match_operand:V4SI 1 "nonimmediate_operand" "")
2870	  (parallel [(const_int 2) (const_int 3)
2871		     (const_int 2) (const_int 3)])))
2872   (set (match_operand:V2DF 0 "register_operand" "")
2873	(float:V2DF
2874	  (vec_select:V2SI
2875	  (match_dup 2)
2876	    (parallel [(const_int 0) (const_int 1)]))))]
2877  "TARGET_SSE2"
2878  "operands[2] = gen_reg_rtx (V4SImode);")
2879
2880(define_expand "vec_unpacks_float_lo_v4si"
2881  [(set (match_operand:V2DF 0 "register_operand" "")
2882	(float:V2DF
2883	  (vec_select:V2SI
2884	    (match_operand:V4SI 1 "nonimmediate_operand" "")
2885	    (parallel [(const_int 0) (const_int 1)]))))]
2886  "TARGET_SSE2")
2887
2888(define_expand "vec_unpacks_float_hi_v8si"
2889  [(set (match_dup 2)
2890	(vec_select:V4SI
2891	  (match_operand:V8SI 1 "nonimmediate_operand" "")
2892	  (parallel [(const_int 4) (const_int 5)
2893		     (const_int 6) (const_int 7)])))
2894   (set (match_operand:V4DF 0 "register_operand" "")
2895	(float:V4DF
2896	  (match_dup 2)))]
2897  "TARGET_AVX"
2898  "operands[2] = gen_reg_rtx (V4SImode);")
2899
2900(define_expand "vec_unpacks_float_lo_v8si"
2901  [(set (match_operand:V4DF 0 "register_operand" "")
2902	(float:V4DF
2903	  (vec_select:V4SI
2904	    (match_operand:V8SI 1 "nonimmediate_operand" "")
2905	    (parallel [(const_int 0) (const_int 1)
2906		       (const_int 2) (const_int 3)]))))]
2907  "TARGET_AVX")
2908
2909(define_expand "vec_unpacku_float_hi_v4si"
2910  [(set (match_dup 5)
2911	(vec_select:V4SI
2912	  (match_operand:V4SI 1 "nonimmediate_operand" "")
2913	  (parallel [(const_int 2) (const_int 3)
2914		     (const_int 2) (const_int 3)])))
2915   (set (match_dup 6)
2916	(float:V2DF
2917	  (vec_select:V2SI
2918	  (match_dup 5)
2919	    (parallel [(const_int 0) (const_int 1)]))))
2920   (set (match_dup 7)
2921	(lt:V2DF (match_dup 6) (match_dup 3)))
2922   (set (match_dup 8)
2923	(and:V2DF (match_dup 7) (match_dup 4)))
2924   (set (match_operand:V2DF 0 "register_operand" "")
2925	(plus:V2DF (match_dup 6) (match_dup 8)))]
2926  "TARGET_SSE2"
2927{
2928  REAL_VALUE_TYPE TWO32r;
2929  rtx x;
2930  int i;
2931
2932  real_ldexp (&TWO32r, &dconst1, 32);
2933  x = const_double_from_real_value (TWO32r, DFmode);
2934
2935  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2936  operands[4] = force_reg (V2DFmode,
2937			   ix86_build_const_vector (V2DFmode, 1, x));
2938
2939  operands[5] = gen_reg_rtx (V4SImode);
2940
2941  for (i = 6; i < 9; i++)
2942    operands[i] = gen_reg_rtx (V2DFmode);
2943})
2944
2945(define_expand "vec_unpacku_float_lo_v4si"
2946  [(set (match_dup 5)
2947	(float:V2DF
2948	  (vec_select:V2SI
2949	    (match_operand:V4SI 1 "nonimmediate_operand" "")
2950	    (parallel [(const_int 0) (const_int 1)]))))
2951   (set (match_dup 6)
2952	(lt:V2DF (match_dup 5) (match_dup 3)))
2953   (set (match_dup 7)
2954	(and:V2DF (match_dup 6) (match_dup 4)))
2955   (set (match_operand:V2DF 0 "register_operand" "")
2956	(plus:V2DF (match_dup 5) (match_dup 7)))]
2957  "TARGET_SSE2"
2958{
2959  REAL_VALUE_TYPE TWO32r;
2960  rtx x;
2961  int i;
2962
2963  real_ldexp (&TWO32r, &dconst1, 32);
2964  x = const_double_from_real_value (TWO32r, DFmode);
2965
2966  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2967  operands[4] = force_reg (V2DFmode,
2968			   ix86_build_const_vector (V2DFmode, 1, x));
2969
2970  for (i = 5; i < 8; i++)
2971    operands[i] = gen_reg_rtx (V2DFmode);
2972})
2973
2974(define_expand "vec_unpacku_float_hi_v8si"
2975  [(match_operand:V4DF 0 "register_operand" "")
2976   (match_operand:V8SI 1 "register_operand" "")]
2977  "TARGET_AVX"
2978{
2979  REAL_VALUE_TYPE TWO32r;
2980  rtx x, tmp[6];
2981  int i;
2982
2983  real_ldexp (&TWO32r, &dconst1, 32);
2984  x = const_double_from_real_value (TWO32r, DFmode);
2985
2986  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2987  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2988  tmp[5] = gen_reg_rtx (V4SImode);
2989
2990  for (i = 2; i < 5; i++)
2991    tmp[i] = gen_reg_rtx (V4DFmode);
2992  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2993  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2994  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2995			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2996  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2997  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2998  DONE;
2999})
3000
3001(define_expand "vec_unpacku_float_lo_v8si"
3002  [(match_operand:V4DF 0 "register_operand" "")
3003   (match_operand:V8SI 1 "nonimmediate_operand" "")]
3004  "TARGET_AVX"
3005{
3006  REAL_VALUE_TYPE TWO32r;
3007  rtx x, tmp[5];
3008  int i;
3009
3010  real_ldexp (&TWO32r, &dconst1, 32);
3011  x = const_double_from_real_value (TWO32r, DFmode);
3012
3013  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3014  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3015
3016  for (i = 2; i < 5; i++)
3017    tmp[i] = gen_reg_rtx (V4DFmode);
3018  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3019  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3020			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3021  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3022  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3023  DONE;
3024})
3025
3026(define_expand "vec_pack_trunc_v4df"
3027  [(set (match_dup 3)
3028	(float_truncate:V4SF
3029	  (match_operand:V4DF 1 "nonimmediate_operand" "")))
3030   (set (match_dup 4)
3031	(float_truncate:V4SF
3032	  (match_operand:V4DF 2 "nonimmediate_operand" "")))
3033   (set (match_operand:V8SF 0 "register_operand" "")
3034	(vec_concat:V8SF
3035	  (match_dup 3)
3036	  (match_dup 4)))]
3037  "TARGET_AVX"
3038{
3039  operands[3] = gen_reg_rtx (V4SFmode);
3040  operands[4] = gen_reg_rtx (V4SFmode);
3041})
3042
3043(define_expand "vec_pack_trunc_v2df"
3044  [(match_operand:V4SF 0 "register_operand" "")
3045   (match_operand:V2DF 1 "nonimmediate_operand" "")
3046   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3047  "TARGET_SSE2"
3048{
3049  rtx tmp0, tmp1;
3050
3051  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3052    {
3053      tmp0 = gen_reg_rtx (V4DFmode);
3054      tmp1 = force_reg (V2DFmode, operands[1]);
3055
3056      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3057      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3058    }
3059  else
3060    {
3061      tmp0 = gen_reg_rtx (V4SFmode);
3062      tmp1 = gen_reg_rtx (V4SFmode);
3063
3064      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3065      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3066      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3067    }
3068  DONE;
3069})
3070
3071(define_expand "vec_pack_sfix_trunc_v4df"
3072  [(match_operand:V8SI 0 "register_operand" "")
3073   (match_operand:V4DF 1 "nonimmediate_operand" "")
3074   (match_operand:V4DF 2 "nonimmediate_operand" "")]
3075  "TARGET_AVX"
3076{
3077  rtx r1, r2;
3078
3079  r1 = gen_reg_rtx (V4SImode);
3080  r2 = gen_reg_rtx (V4SImode);
3081
3082  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3083  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3084  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3085  DONE;
3086})
3087
3088(define_expand "vec_pack_sfix_trunc_v2df"
3089  [(match_operand:V4SI 0 "register_operand" "")
3090   (match_operand:V2DF 1 "nonimmediate_operand" "")
3091   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3092  "TARGET_SSE2"
3093{
3094  rtx tmp0, tmp1;
3095
3096  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3097    {
3098      tmp0 = gen_reg_rtx (V4DFmode);
3099      tmp1 = force_reg (V2DFmode, operands[1]);
3100
3101      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3102      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3103    }
3104  else
3105    {
3106      tmp0 = gen_reg_rtx (V4SImode);
3107      tmp1 = gen_reg_rtx (V4SImode);
3108
3109      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3110      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3111      emit_insn
3112       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3113				    gen_lowpart (V2DImode, tmp0),
3114				    gen_lowpart (V2DImode, tmp1)));
3115    }
3116  DONE;
3117})
3118
3119(define_mode_attr ssepackfltmode
3120  [(V4DF "V8SI") (V2DF "V4SI")])
3121
3122(define_expand "vec_pack_ufix_trunc_<mode>"
3123  [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3124   (match_operand:VF2 1 "register_operand" "")
3125   (match_operand:VF2 2 "register_operand" "")]
3126  "TARGET_SSE2"
3127{
3128  rtx tmp[7];
3129  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3130  tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3131  tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3132  emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3133  if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3134    {
3135      tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3136      ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3137    }
3138  else
3139    {
3140      tmp[5] = gen_reg_rtx (V8SFmode);
3141      ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3142					gen_lowpart (V8SFmode, tmp[3]), 0);
3143      tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3144    }
3145  tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3146				operands[0], 0, OPTAB_DIRECT);
3147  if (tmp[6] != operands[0])
3148    emit_move_insn (operands[0], tmp[6]);
3149  DONE;
3150})
3151
3152(define_expand "vec_pack_sfix_v4df"
3153  [(match_operand:V8SI 0 "register_operand" "")
3154   (match_operand:V4DF 1 "nonimmediate_operand" "")
3155   (match_operand:V4DF 2 "nonimmediate_operand" "")]
3156  "TARGET_AVX"
3157{
3158  rtx r1, r2;
3159
3160  r1 = gen_reg_rtx (V4SImode);
3161  r2 = gen_reg_rtx (V4SImode);
3162
3163  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3164  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3165  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3166  DONE;
3167})
3168
3169(define_expand "vec_pack_sfix_v2df"
3170  [(match_operand:V4SI 0 "register_operand" "")
3171   (match_operand:V2DF 1 "nonimmediate_operand" "")
3172   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3173  "TARGET_SSE2"
3174{
3175  rtx tmp0, tmp1;
3176
3177  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3178    {
3179      tmp0 = gen_reg_rtx (V4DFmode);
3180      tmp1 = force_reg (V2DFmode, operands[1]);
3181
3182      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3183      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3184    }
3185  else
3186    {
3187      tmp0 = gen_reg_rtx (V4SImode);
3188      tmp1 = gen_reg_rtx (V4SImode);
3189
3190      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3191      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3192      emit_insn
3193       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3194				    gen_lowpart (V2DImode, tmp0),
3195				    gen_lowpart (V2DImode, tmp1)));
3196    }
3197  DONE;
3198})
3199
3200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3201;;
3202;; Parallel single-precision floating point element swizzling
3203;;
3204;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3205
3206(define_expand "sse_movhlps_exp"
3207  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3208	(vec_select:V4SF
3209	  (vec_concat:V8SF
3210	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3211	    (match_operand:V4SF 2 "nonimmediate_operand" ""))
3212	  (parallel [(const_int 6)
3213		     (const_int 7)
3214		     (const_int 2)
3215		     (const_int 3)])))]
3216  "TARGET_SSE"
3217{
3218  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3219
3220  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3221
3222  /* Fix up the destination if needed.  */
3223  if (dst != operands[0])
3224    emit_move_insn (operands[0], dst);
3225
3226  DONE;
3227})
3228
3229(define_insn "sse_movhlps"
3230  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
3231	(vec_select:V4SF
3232	  (vec_concat:V8SF
3233	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3234	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3235	  (parallel [(const_int 6)
3236		     (const_int 7)
3237		     (const_int 2)
3238		     (const_int 3)])))]
3239  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3240  "@
3241   movhlps\t{%2, %0|%0, %2}
3242   vmovhlps\t{%2, %1, %0|%0, %1, %2}
3243   movlps\t{%H2, %0|%0, %H2}
3244   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3245   %vmovhps\t{%2, %0|%0, %2}"
3246  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3247   (set_attr "type" "ssemov")
3248   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3249   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3250
3251(define_expand "sse_movlhps_exp"
3252  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3253	(vec_select:V4SF
3254	  (vec_concat:V8SF
3255	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3256	    (match_operand:V4SF 2 "nonimmediate_operand" ""))
3257	  (parallel [(const_int 0)
3258		     (const_int 1)
3259		     (const_int 4)
3260		     (const_int 5)])))]
3261  "TARGET_SSE"
3262{
3263  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3264
3265  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3266
3267  /* Fix up the destination if needed.  */
3268  if (dst != operands[0])
3269    emit_move_insn (operands[0], dst);
3270
3271  DONE;
3272})
3273
3274(define_insn "sse_movlhps"
3275  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3276	(vec_select:V4SF
3277	  (vec_concat:V8SF
3278	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3279	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
3280	  (parallel [(const_int 0)
3281		     (const_int 1)
3282		     (const_int 4)
3283		     (const_int 5)])))]
3284  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3285  "@
3286   movlhps\t{%2, %0|%0, %2}
3287   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3288   movhps\t{%2, %0|%0, %2}
3289   vmovhps\t{%2, %1, %0|%0, %1, %2}
3290   %vmovlps\t{%2, %H0|%H0, %2}"
3291  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3292   (set_attr "type" "ssemov")
3293   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3294   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3295
3296;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3297(define_insn "avx_unpckhps256"
3298  [(set (match_operand:V8SF 0 "register_operand" "=x")
3299	(vec_select:V8SF
3300	  (vec_concat:V16SF
3301	    (match_operand:V8SF 1 "register_operand" "x")
3302	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3303	  (parallel [(const_int 2) (const_int 10)
3304		     (const_int 3) (const_int 11)
3305		     (const_int 6) (const_int 14)
3306		     (const_int 7) (const_int 15)])))]
3307  "TARGET_AVX"
3308  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3309  [(set_attr "type" "sselog")
3310   (set_attr "prefix" "vex")
3311   (set_attr "mode" "V8SF")])
3312
3313(define_expand "vec_interleave_highv8sf"
3314  [(set (match_dup 3)
3315	(vec_select:V8SF
3316	  (vec_concat:V16SF
3317	    (match_operand:V8SF 1 "register_operand" "x")
3318	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3319	  (parallel [(const_int 0) (const_int 8)
3320		     (const_int 1) (const_int 9)
3321		     (const_int 4) (const_int 12)
3322		     (const_int 5) (const_int 13)])))
3323   (set (match_dup 4)
3324	(vec_select:V8SF
3325	  (vec_concat:V16SF
3326	    (match_dup 1)
3327	    (match_dup 2))
3328	  (parallel [(const_int 2) (const_int 10)
3329		     (const_int 3) (const_int 11)
3330		     (const_int 6) (const_int 14)
3331		     (const_int 7) (const_int 15)])))
3332   (set (match_operand:V8SF 0 "register_operand" "")
3333	(vec_select:V8SF
3334	  (vec_concat:V16SF
3335	    (match_dup 3)
3336	    (match_dup 4))
3337	  (parallel [(const_int 4) (const_int 5)
3338		     (const_int 6) (const_int 7)
3339		     (const_int 12) (const_int 13)
3340		     (const_int 14) (const_int 15)])))]
3341 "TARGET_AVX"
3342{
3343  operands[3] = gen_reg_rtx (V8SFmode);
3344  operands[4] = gen_reg_rtx (V8SFmode);
3345})
3346
3347(define_insn "vec_interleave_highv4sf"
3348  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3349	(vec_select:V4SF
3350	  (vec_concat:V8SF
3351	    (match_operand:V4SF 1 "register_operand" "0,x")
3352	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3353	  (parallel [(const_int 2) (const_int 6)
3354		     (const_int 3) (const_int 7)])))]
3355  "TARGET_SSE"
3356  "@
3357   unpckhps\t{%2, %0|%0, %2}
3358   vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3359  [(set_attr "isa" "noavx,avx")
3360   (set_attr "type" "sselog")
3361   (set_attr "prefix" "orig,vex")
3362   (set_attr "mode" "V4SF")])
3363
3364;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3365(define_insn "avx_unpcklps256"
3366  [(set (match_operand:V8SF 0 "register_operand" "=x")
3367	(vec_select:V8SF
3368	  (vec_concat:V16SF
3369	    (match_operand:V8SF 1 "register_operand" "x")
3370	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3371	  (parallel [(const_int 0) (const_int 8)
3372		     (const_int 1) (const_int 9)
3373		     (const_int 4) (const_int 12)
3374		     (const_int 5) (const_int 13)])))]
3375  "TARGET_AVX"
3376  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3377  [(set_attr "type" "sselog")
3378   (set_attr "prefix" "vex")
3379   (set_attr "mode" "V8SF")])
3380
3381(define_expand "vec_interleave_lowv8sf"
3382  [(set (match_dup 3)
3383	(vec_select:V8SF
3384	  (vec_concat:V16SF
3385	    (match_operand:V8SF 1 "register_operand" "x")
3386	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3387	  (parallel [(const_int 0) (const_int 8)
3388		     (const_int 1) (const_int 9)
3389		     (const_int 4) (const_int 12)
3390		     (const_int 5) (const_int 13)])))
3391   (set (match_dup 4)
3392	(vec_select:V8SF
3393	  (vec_concat:V16SF
3394	    (match_dup 1)
3395	    (match_dup 2))
3396	  (parallel [(const_int 2) (const_int 10)
3397		     (const_int 3) (const_int 11)
3398		     (const_int 6) (const_int 14)
3399		     (const_int 7) (const_int 15)])))
3400   (set (match_operand:V8SF 0 "register_operand" "")
3401	(vec_select:V8SF
3402	  (vec_concat:V16SF
3403	    (match_dup 3)
3404	    (match_dup 4))
3405	  (parallel [(const_int 0) (const_int 1)
3406		     (const_int 2) (const_int 3)
3407		     (const_int 8) (const_int 9)
3408		     (const_int 10) (const_int 11)])))]
3409 "TARGET_AVX"
3410{
3411  operands[3] = gen_reg_rtx (V8SFmode);
3412  operands[4] = gen_reg_rtx (V8SFmode);
3413})
3414
3415(define_insn "vec_interleave_lowv4sf"
3416  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3417	(vec_select:V4SF
3418	  (vec_concat:V8SF
3419	    (match_operand:V4SF 1 "register_operand" "0,x")
3420	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3421	  (parallel [(const_int 0) (const_int 4)
3422		     (const_int 1) (const_int 5)])))]
3423  "TARGET_SSE"
3424  "@
3425   unpcklps\t{%2, %0|%0, %2}
3426   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3427  [(set_attr "isa" "noavx,avx")
3428   (set_attr "type" "sselog")
3429   (set_attr "prefix" "orig,vex")
3430   (set_attr "mode" "V4SF")])
3431
3432;; These are modeled with the same vec_concat as the others so that we
3433;; capture users of shufps that can use the new instructions
3434(define_insn "avx_movshdup256"
3435  [(set (match_operand:V8SF 0 "register_operand" "=x")
3436	(vec_select:V8SF
3437	  (vec_concat:V16SF
3438	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3439	    (match_dup 1))
3440	  (parallel [(const_int 1) (const_int 1)
3441		     (const_int 3) (const_int 3)
3442		     (const_int 5) (const_int 5)
3443		     (const_int 7) (const_int 7)])))]
3444  "TARGET_AVX"
3445  "vmovshdup\t{%1, %0|%0, %1}"
3446  [(set_attr "type" "sse")
3447   (set_attr "prefix" "vex")
3448   (set_attr "mode" "V8SF")])
3449
3450(define_insn "sse3_movshdup"
3451  [(set (match_operand:V4SF 0 "register_operand" "=x")
3452	(vec_select:V4SF
3453	  (vec_concat:V8SF
3454	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3455	    (match_dup 1))
3456	  (parallel [(const_int 1)
3457		     (const_int 1)
3458		     (const_int 7)
3459		     (const_int 7)])))]
3460  "TARGET_SSE3"
3461  "%vmovshdup\t{%1, %0|%0, %1}"
3462  [(set_attr "type" "sse")
3463   (set_attr "prefix_rep" "1")
3464   (set_attr "prefix" "maybe_vex")
3465   (set_attr "mode" "V4SF")])
3466
3467(define_insn "avx_movsldup256"
3468  [(set (match_operand:V8SF 0 "register_operand" "=x")
3469	(vec_select:V8SF
3470	  (vec_concat:V16SF
3471	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3472	    (match_dup 1))
3473	  (parallel [(const_int 0) (const_int 0)
3474		     (const_int 2) (const_int 2)
3475		     (const_int 4) (const_int 4)
3476		     (const_int 6) (const_int 6)])))]
3477  "TARGET_AVX"
3478  "vmovsldup\t{%1, %0|%0, %1}"
3479  [(set_attr "type" "sse")
3480   (set_attr "prefix" "vex")
3481   (set_attr "mode" "V8SF")])
3482
3483(define_insn "sse3_movsldup"
3484  [(set (match_operand:V4SF 0 "register_operand" "=x")
3485	(vec_select:V4SF
3486	  (vec_concat:V8SF
3487	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3488	    (match_dup 1))
3489	  (parallel [(const_int 0)
3490		     (const_int 0)
3491		     (const_int 6)
3492		     (const_int 6)])))]
3493  "TARGET_SSE3"
3494  "%vmovsldup\t{%1, %0|%0, %1}"
3495  [(set_attr "type" "sse")
3496   (set_attr "prefix_rep" "1")
3497   (set_attr "prefix" "maybe_vex")
3498   (set_attr "mode" "V4SF")])
3499
3500(define_expand "avx_shufps256"
3501  [(match_operand:V8SF 0 "register_operand" "")
3502   (match_operand:V8SF 1 "register_operand" "")
3503   (match_operand:V8SF 2 "nonimmediate_operand" "")
3504   (match_operand:SI 3 "const_int_operand" "")]
3505  "TARGET_AVX"
3506{
3507  int mask = INTVAL (operands[3]);
3508  emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3509				  GEN_INT ((mask >> 0) & 3),
3510				  GEN_INT ((mask >> 2) & 3),
3511				  GEN_INT (((mask >> 4) & 3) + 8),
3512				  GEN_INT (((mask >> 6) & 3) + 8),
3513				  GEN_INT (((mask >> 0) & 3) + 4),
3514				  GEN_INT (((mask >> 2) & 3) + 4),
3515				  GEN_INT (((mask >> 4) & 3) + 12),
3516				  GEN_INT (((mask >> 6) & 3) + 12)));
3517  DONE;
3518})
3519
3520;; One bit in mask selects 2 elements.
3521(define_insn "avx_shufps256_1"
3522  [(set (match_operand:V8SF 0 "register_operand" "=x")
3523	(vec_select:V8SF
3524	  (vec_concat:V16SF
3525	    (match_operand:V8SF 1 "register_operand" "x")
3526	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3527	  (parallel [(match_operand 3  "const_0_to_3_operand"   "")
3528		     (match_operand 4  "const_0_to_3_operand"   "")
3529		     (match_operand 5  "const_8_to_11_operand"  "")
3530		     (match_operand 6  "const_8_to_11_operand"  "")
3531		     (match_operand 7  "const_4_to_7_operand"   "")
3532		     (match_operand 8  "const_4_to_7_operand"   "")
3533		     (match_operand 9  "const_12_to_15_operand" "")
3534		     (match_operand 10 "const_12_to_15_operand" "")])))]
3535  "TARGET_AVX
3536   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3537       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3538       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3539       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3540{
3541  int mask;
3542  mask = INTVAL (operands[3]);
3543  mask |= INTVAL (operands[4]) << 2;
3544  mask |= (INTVAL (operands[5]) - 8) << 4;
3545  mask |= (INTVAL (operands[6]) - 8) << 6;
3546  operands[3] = GEN_INT (mask);
3547
3548  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3549}
3550  [(set_attr "type" "sselog")
3551   (set_attr "length_immediate" "1")
3552   (set_attr "prefix" "vex")
3553   (set_attr "mode" "V8SF")])
3554
3555(define_expand "sse_shufps"
3556  [(match_operand:V4SF 0 "register_operand" "")
3557   (match_operand:V4SF 1 "register_operand" "")
3558   (match_operand:V4SF 2 "nonimmediate_operand" "")
3559   (match_operand:SI 3 "const_int_operand" "")]
3560  "TARGET_SSE"
3561{
3562  int mask = INTVAL (operands[3]);
3563  emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3564			       GEN_INT ((mask >> 0) & 3),
3565			       GEN_INT ((mask >> 2) & 3),
3566			       GEN_INT (((mask >> 4) & 3) + 4),
3567			       GEN_INT (((mask >> 6) & 3) + 4)));
3568  DONE;
3569})
3570
3571(define_insn "sse_shufps_<mode>"
3572  [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3573	(vec_select:VI4F_128
3574	  (vec_concat:<ssedoublevecmode>
3575	    (match_operand:VI4F_128 1 "register_operand" "0,x")
3576	    (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3577	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
3578		     (match_operand 4 "const_0_to_3_operand" "")
3579		     (match_operand 5 "const_4_to_7_operand" "")
3580		     (match_operand 6 "const_4_to_7_operand" "")])))]
3581  "TARGET_SSE"
3582{
3583  int mask = 0;
3584  mask |= INTVAL (operands[3]) << 0;
3585  mask |= INTVAL (operands[4]) << 2;
3586  mask |= (INTVAL (operands[5]) - 4) << 4;
3587  mask |= (INTVAL (operands[6]) - 4) << 6;
3588  operands[3] = GEN_INT (mask);
3589
3590  switch (which_alternative)
3591    {
3592    case 0:
3593      return "shufps\t{%3, %2, %0|%0, %2, %3}";
3594    case 1:
3595      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3596    default:
3597      gcc_unreachable ();
3598    }
3599}
3600  [(set_attr "isa" "noavx,avx")
3601   (set_attr "type" "sselog")
3602   (set_attr "length_immediate" "1")
3603   (set_attr "prefix" "orig,vex")
3604   (set_attr "mode" "V4SF")])
3605
3606(define_insn "sse_storehps"
3607  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3608	(vec_select:V2SF
3609	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3610	  (parallel [(const_int 2) (const_int 3)])))]
3611  "TARGET_SSE"
3612  "@
3613   %vmovhps\t{%1, %0|%0, %1}
3614   %vmovhlps\t{%1, %d0|%d0, %1}
3615   %vmovlps\t{%H1, %d0|%d0, %H1}"
3616  [(set_attr "type" "ssemov")
3617   (set_attr "prefix" "maybe_vex")
3618   (set_attr "mode" "V2SF,V4SF,V2SF")])
3619
3620(define_expand "sse_loadhps_exp"
3621  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3622	(vec_concat:V4SF
3623	  (vec_select:V2SF
3624	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3625	    (parallel [(const_int 0) (const_int 1)]))
3626	  (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3627  "TARGET_SSE"
3628{
3629  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3630
3631  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3632
3633  /* Fix up the destination if needed.  */
3634  if (dst != operands[0])
3635    emit_move_insn (operands[0], dst);
3636
3637  DONE;
3638})
3639
3640(define_insn "sse_loadhps"
3641  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3642	(vec_concat:V4SF
3643	  (vec_select:V2SF
3644	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3645	    (parallel [(const_int 0) (const_int 1)]))
3646	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,x,x")))]
3647  "TARGET_SSE"
3648  "@
3649   movhps\t{%2, %0|%0, %2}
3650   vmovhps\t{%2, %1, %0|%0, %1, %2}
3651   movlhps\t{%2, %0|%0, %2}
3652   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3653   %vmovlps\t{%2, %H0|%H0, %2}"
3654  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3655   (set_attr "type" "ssemov")
3656   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3657   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3658
3659(define_insn "sse_storelps"
3660  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,x,x")
3661	(vec_select:V2SF
3662	  (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3663	  (parallel [(const_int 0) (const_int 1)])))]
3664  "TARGET_SSE"
3665  "@
3666   %vmovlps\t{%1, %0|%0, %1}
3667   %vmovaps\t{%1, %0|%0, %1}
3668   %vmovlps\t{%1, %d0|%d0, %1}"
3669  [(set_attr "type" "ssemov")
3670   (set_attr "prefix" "maybe_vex")
3671   (set_attr "mode" "V2SF,V4SF,V2SF")])
3672
3673(define_expand "sse_loadlps_exp"
3674  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3675	(vec_concat:V4SF
3676	  (match_operand:V2SF 2 "nonimmediate_operand" "")
3677	  (vec_select:V2SF
3678	    (match_operand:V4SF 1 "nonimmediate_operand" "")
3679	    (parallel [(const_int 2) (const_int 3)]))))]
3680  "TARGET_SSE"
3681{
3682  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3683
3684  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3685
3686  /* Fix up the destination if needed.  */
3687  if (dst != operands[0])
3688    emit_move_insn (operands[0], dst);
3689
3690  DONE;
3691})
3692
3693(define_insn "sse_loadlps"
3694  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
3695	(vec_concat:V4SF
3696	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,x,m,m,x")
3697	  (vec_select:V2SF
3698	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3699	    (parallel [(const_int 2) (const_int 3)]))))]
3700  "TARGET_SSE"
3701  "@
3702   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3703   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3704   movlps\t{%2, %0|%0, %2}
3705   vmovlps\t{%2, %1, %0|%0, %1, %2}
3706   %vmovlps\t{%2, %0|%0, %2}"
3707  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3708   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3709   (set_attr "length_immediate" "1,1,*,*,*")
3710   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3711   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3712
3713(define_insn "sse_movss"
3714  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
3715	(vec_merge:V4SF
3716	  (match_operand:V4SF 2 "register_operand" " x,x")
3717	  (match_operand:V4SF 1 "register_operand" " 0,x")
3718	  (const_int 1)))]
3719  "TARGET_SSE"
3720  "@
3721   movss\t{%2, %0|%0, %2}
3722   vmovss\t{%2, %1, %0|%0, %1, %2}"
3723  [(set_attr "isa" "noavx,avx")
3724   (set_attr "type" "ssemov")
3725   (set_attr "prefix" "orig,vex")
3726   (set_attr "mode" "SF")])
3727
3728(define_insn "avx2_vec_dup<mode>"
3729  [(set (match_operand:VF1 0 "register_operand" "=x")
3730	(vec_duplicate:VF1
3731	  (vec_select:SF
3732	    (match_operand:V4SF 1 "register_operand" "x")
3733	    (parallel [(const_int 0)]))))]
3734  "TARGET_AVX2"
3735  "vbroadcastss\t{%1, %0|%0, %1}"
3736  [(set_attr "type" "sselog1")
3737    (set_attr "prefix" "vex")
3738    (set_attr "mode" "<MODE>")])
3739
3740(define_insn "vec_dupv4sf"
3741  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3742	(vec_duplicate:V4SF
3743	  (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3744  "TARGET_SSE"
3745  "@
3746   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3747   vbroadcastss\t{%1, %0|%0, %1}
3748   shufps\t{$0, %0, %0|%0, %0, 0}"
3749  [(set_attr "isa" "avx,avx,noavx")
3750   (set_attr "type" "sselog1,ssemov,sselog1")
3751   (set_attr "length_immediate" "1,0,1")
3752   (set_attr "prefix_extra" "0,1,*")
3753   (set_attr "prefix" "vex,vex,orig")
3754   (set_attr "mode" "V4SF")])
3755
3756;; Although insertps takes register source, we prefer
3757;; unpcklps with register source since it is shorter.
3758(define_insn "*vec_concatv2sf_sse4_1"
3759  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,x,x,*y ,*y")
3760	(vec_concat:V2SF
3761	  (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3762	  (match_operand:SF 2 "vector_move_operand"  " x,x,m,m,C,*ym, C")))]
3763  "TARGET_SSE4_1"
3764  "@
3765   unpcklps\t{%2, %0|%0, %2}
3766   vunpcklps\t{%2, %1, %0|%0, %1, %2}
3767   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3768   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3769   %vmovss\t{%1, %0|%0, %1}
3770   punpckldq\t{%2, %0|%0, %2}
3771   movd\t{%1, %0|%0, %1}"
3772  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3773   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3774   (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3775   (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3776   (set_attr "length_immediate" "*,*,1,1,*,*,*")
3777   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3778   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3779
3780;; ??? In theory we can match memory for the MMX alternative, but allowing
3781;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3782;; alternatives pretty much forces the MMX alternative to be chosen.
3783(define_insn "*vec_concatv2sf_sse"
3784  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
3785	(vec_concat:V2SF
3786	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3787	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
3788  "TARGET_SSE"
3789  "@
3790   unpcklps\t{%2, %0|%0, %2}
3791   movss\t{%1, %0|%0, %1}
3792   punpckldq\t{%2, %0|%0, %2}
3793   movd\t{%1, %0|%0, %1}"
3794  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3795   (set_attr "mode" "V4SF,SF,DI,DI")])
3796
3797(define_insn "*vec_concatv4sf"
3798  [(set (match_operand:V4SF 0 "register_operand"       "=x,x,x,x")
3799	(vec_concat:V4SF
3800	  (match_operand:V2SF 1 "register_operand"     " 0,x,0,x")
3801	  (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3802  "TARGET_SSE"
3803  "@
3804   movlhps\t{%2, %0|%0, %2}
3805   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3806   movhps\t{%2, %0|%0, %2}
3807   vmovhps\t{%2, %1, %0|%0, %1, %2}"
3808  [(set_attr "isa" "noavx,avx,noavx,avx")
3809   (set_attr "type" "ssemov")
3810   (set_attr "prefix" "orig,vex,orig,vex")
3811   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3812
3813(define_expand "vec_init<mode>"
3814  [(match_operand:V_128 0 "register_operand" "")
3815   (match_operand 1 "" "")]
3816  "TARGET_SSE"
3817{
3818  ix86_expand_vector_init (false, operands[0], operands[1]);
3819  DONE;
3820})
3821
3822;; Avoid combining registers from different units in a single alternative,
3823;; see comment above inline_secondary_memory_needed function in i386.c
3824(define_insn "vec_set<mode>_0"
3825  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3826	  "=x,x,x ,x,x,x,x  ,x  ,m ,m   ,m")
3827	(vec_merge:VI4F_128
3828	  (vec_duplicate:VI4F_128
3829	    (match_operand:<ssescalarmode> 2 "general_operand"
3830	  " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
3831	  (match_operand:VI4F_128 1 "vector_move_operand"
3832	  " C,C,C ,C,0,x,0  ,x  ,0 ,0   ,0")
3833	  (const_int 1)))]
3834  "TARGET_SSE"
3835  "@
3836   %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3837   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3838   %vmovd\t{%2, %0|%0, %2}
3839   movss\t{%2, %0|%0, %2}
3840   movss\t{%2, %0|%0, %2}
3841   vmovss\t{%2, %1, %0|%0, %1, %2}
3842   pinsrd\t{$0, %2, %0|%0, %2, 0}
3843   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3844   #
3845   #
3846   #"
3847  [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3848   (set (attr "type")
3849     (cond [(eq_attr "alternative" "0,6,7")
3850	      (const_string "sselog")
3851	    (eq_attr "alternative" "9")
3852	      (const_string "imov")
3853	    (eq_attr "alternative" "10")
3854	      (const_string "fmov")
3855	   ]
3856	   (const_string "ssemov")))
3857   (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3858   (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3859   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3860   (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3861
3862;; A subset is vec_setv4sf.
3863(define_insn "*vec_setv4sf_sse4_1"
3864  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3865	(vec_merge:V4SF
3866	  (vec_duplicate:V4SF
3867	    (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3868	  (match_operand:V4SF 1 "register_operand" "0,x")
3869	  (match_operand:SI 3 "const_int_operand" "")))]
3870  "TARGET_SSE4_1
3871   && ((unsigned) exact_log2 (INTVAL (operands[3]))
3872       < GET_MODE_NUNITS (V4SFmode))"
3873{
3874  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3875  switch (which_alternative)
3876    {
3877    case 0:
3878      return "insertps\t{%3, %2, %0|%0, %2, %3}";
3879    case 1:
3880      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3881    default:
3882      gcc_unreachable ();
3883    }
3884}
3885  [(set_attr "isa" "noavx,avx")
3886   (set_attr "type" "sselog")
3887   (set_attr "prefix_data16" "1,*")
3888   (set_attr "prefix_extra" "1")
3889   (set_attr "length_immediate" "1")
3890   (set_attr "prefix" "orig,vex")
3891   (set_attr "mode" "V4SF")])
3892
3893(define_insn "sse4_1_insertps"
3894  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3895	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3896		      (match_operand:V4SF 1 "register_operand" "0,x")
3897		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3898		     UNSPEC_INSERTPS))]
3899  "TARGET_SSE4_1"
3900{
3901  if (MEM_P (operands[2]))
3902    {
3903      unsigned count_s = INTVAL (operands[3]) >> 6;
3904      if (count_s)
3905	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3906      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3907    }
3908  switch (which_alternative)
3909    {
3910    case 0:
3911      return "insertps\t{%3, %2, %0|%0, %2, %3}";
3912    case 1:
3913      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3914    default:
3915      gcc_unreachable ();
3916    }
3917}
3918  [(set_attr "isa" "noavx,avx")
3919   (set_attr "type" "sselog")
3920   (set_attr "prefix_data16" "1,*")
3921   (set_attr "prefix_extra" "1")
3922   (set_attr "length_immediate" "1")
3923   (set_attr "prefix" "orig,vex")
3924   (set_attr "mode" "V4SF")])
3925
3926(define_split
3927  [(set (match_operand:VI4F_128 0 "memory_operand" "")
3928	(vec_merge:VI4F_128
3929	  (vec_duplicate:VI4F_128
3930	    (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3931	  (match_dup 0)
3932	  (const_int 1)))]
3933  "TARGET_SSE && reload_completed"
3934  [(const_int 0)]
3935{
3936  emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3937		  operands[1]);
3938  DONE;
3939})
3940
3941(define_expand "vec_set<mode>"
3942  [(match_operand:V 0 "register_operand" "")
3943   (match_operand:<ssescalarmode> 1 "register_operand" "")
3944   (match_operand 2 "const_int_operand" "")]
3945  "TARGET_SSE"
3946{
3947  ix86_expand_vector_set (false, operands[0], operands[1],
3948			  INTVAL (operands[2]));
3949  DONE;
3950})
3951
3952(define_insn_and_split "*vec_extractv4sf_0"
3953  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3954	(vec_select:SF
3955	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3956	  (parallel [(const_int 0)])))]
3957  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3958  "#"
3959  "&& reload_completed"
3960  [(const_int 0)]
3961{
3962  rtx op1 = operands[1];
3963  if (REG_P (op1))
3964    op1 = gen_rtx_REG (SFmode, REGNO (op1));
3965  else
3966    op1 = gen_lowpart (SFmode, op1);
3967  emit_move_insn (operands[0], op1);
3968  DONE;
3969})
3970
3971(define_insn_and_split "*sse4_1_extractps"
3972  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3973	(vec_select:SF
3974	  (match_operand:V4SF 1 "register_operand" "x,0,x")
3975	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3976  "TARGET_SSE4_1"
3977  "@
3978   %vextractps\t{%2, %1, %0|%0, %1, %2}
3979   #
3980   #"
3981  "&& reload_completed && SSE_REG_P (operands[0])"
3982  [(const_int 0)]
3983{
3984  rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3985  switch (INTVAL (operands[2]))
3986    {
3987    case 1:
3988    case 3:
3989      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3990				      operands[2], operands[2],
3991				      GEN_INT (INTVAL (operands[2]) + 4),
3992				      GEN_INT (INTVAL (operands[2]) + 4)));
3993      break;
3994    case 2:
3995      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3996      break;
3997    default:
3998      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
3999      gcc_unreachable ();
4000    }
4001  DONE;
4002}
4003  [(set_attr "isa" "*,noavx,avx")
4004   (set_attr "type" "sselog,*,*")
4005   (set_attr "prefix_data16" "1,*,*")
4006   (set_attr "prefix_extra" "1,*,*")
4007   (set_attr "length_immediate" "1,*,*")
4008   (set_attr "prefix" "maybe_vex,*,*")
4009   (set_attr "mode" "V4SF,*,*")])
4010
4011(define_insn_and_split "*vec_extract_v4sf_mem"
4012  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4013       (vec_select:SF
4014	 (match_operand:V4SF 1 "memory_operand" "o,o,o")
4015	 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4016  "TARGET_SSE"
4017  "#"
4018  "&& reload_completed"
4019  [(const_int 0)]
4020{
4021  int i = INTVAL (operands[2]);
4022
4023  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4024  DONE;
4025})
4026
4027(define_expand "avx_vextractf128<mode>"
4028  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
4029   (match_operand:V_256 1 "register_operand" "")
4030   (match_operand:SI 2 "const_0_to_1_operand" "")]
4031  "TARGET_AVX"
4032{
4033  rtx (*insn)(rtx, rtx);
4034
4035  switch (INTVAL (operands[2]))
4036    {
4037    case 0:
4038      insn = gen_vec_extract_lo_<mode>;
4039      break;
4040    case 1:
4041      insn = gen_vec_extract_hi_<mode>;
4042      break;
4043    default:
4044      gcc_unreachable ();
4045    }
4046
4047  emit_insn (insn (operands[0], operands[1]));
4048  DONE;
4049})
4050
4051(define_insn_and_split "vec_extract_lo_<mode>"
4052  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4053	(vec_select:<ssehalfvecmode>
4054	  (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4055	  (parallel [(const_int 0) (const_int 1)])))]
4056  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4057  "#"
4058  "&& reload_completed"
4059  [(const_int 0)]
4060{
4061  rtx op1 = operands[1];
4062  if (REG_P (op1))
4063    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4064  else
4065    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4066  emit_move_insn (operands[0], op1);
4067  DONE;
4068})
4069
4070(define_insn "vec_extract_hi_<mode>"
4071  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4072	(vec_select:<ssehalfvecmode>
4073	  (match_operand:VI8F_256 1 "register_operand" "x,x")
4074	  (parallel [(const_int 2) (const_int 3)])))]
4075  "TARGET_AVX"
4076  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4077  [(set_attr "type" "sselog")
4078   (set_attr "prefix_extra" "1")
4079   (set_attr "length_immediate" "1")
4080   (set_attr "memory" "none,store")
4081   (set_attr "prefix" "vex")
4082   (set_attr "mode" "<sseinsnmode>")])
4083
4084(define_insn_and_split "vec_extract_lo_<mode>"
4085  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4086	(vec_select:<ssehalfvecmode>
4087	  (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4088	  (parallel [(const_int 0) (const_int 1)
4089		     (const_int 2) (const_int 3)])))]
4090  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4091  "#"
4092  "&& reload_completed"
4093  [(const_int 0)]
4094{
4095  rtx op1 = operands[1];
4096  if (REG_P (op1))
4097    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4098  else
4099    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4100  emit_move_insn (operands[0], op1);
4101  DONE;
4102})
4103
4104(define_insn "vec_extract_hi_<mode>"
4105  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4106	(vec_select:<ssehalfvecmode>
4107	  (match_operand:VI4F_256 1 "register_operand" "x,x")
4108	  (parallel [(const_int 4) (const_int 5)
4109		     (const_int 6) (const_int 7)])))]
4110  "TARGET_AVX"
4111  "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4112  [(set_attr "type" "sselog")
4113   (set_attr "prefix_extra" "1")
4114   (set_attr "length_immediate" "1")
4115   (set_attr "memory" "none,store")
4116   (set_attr "prefix" "vex")
4117   (set_attr "mode" "<sseinsnmode>")])
4118
4119(define_insn_and_split "vec_extract_lo_v16hi"
4120  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4121	(vec_select:V8HI
4122	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4123	  (parallel [(const_int 0) (const_int 1)
4124		     (const_int 2) (const_int 3)
4125		     (const_int 4) (const_int 5)
4126		     (const_int 6) (const_int 7)])))]
4127  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4128  "#"
4129  "&& reload_completed"
4130  [(const_int 0)]
4131{
4132  rtx op1 = operands[1];
4133  if (REG_P (op1))
4134    op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4135  else
4136    op1 = gen_lowpart (V8HImode, op1);
4137  emit_move_insn (operands[0], op1);
4138  DONE;
4139})
4140
4141(define_insn "vec_extract_hi_v16hi"
4142  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4143	(vec_select:V8HI
4144	  (match_operand:V16HI 1 "register_operand" "x,x")
4145	  (parallel [(const_int 8) (const_int 9)
4146		     (const_int 10) (const_int 11)
4147		     (const_int 12) (const_int 13)
4148		     (const_int 14) (const_int 15)])))]
4149  "TARGET_AVX"
4150  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4151  [(set_attr "type" "sselog")
4152   (set_attr "prefix_extra" "1")
4153   (set_attr "length_immediate" "1")
4154   (set_attr "memory" "none,store")
4155   (set_attr "prefix" "vex")
4156   (set_attr "mode" "OI")])
4157
4158(define_insn_and_split "vec_extract_lo_v32qi"
4159  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4160	(vec_select:V16QI
4161	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4162	  (parallel [(const_int 0) (const_int 1)
4163		     (const_int 2) (const_int 3)
4164		     (const_int 4) (const_int 5)
4165		     (const_int 6) (const_int 7)
4166		     (const_int 8) (const_int 9)
4167		     (const_int 10) (const_int 11)
4168		     (const_int 12) (const_int 13)
4169		     (const_int 14) (const_int 15)])))]
4170  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4171  "#"
4172  "&& reload_completed"
4173  [(const_int 0)]
4174{
4175  rtx op1 = operands[1];
4176  if (REG_P (op1))
4177    op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4178  else
4179    op1 = gen_lowpart (V16QImode, op1);
4180  emit_move_insn (operands[0], op1);
4181  DONE;
4182})
4183
4184(define_insn "vec_extract_hi_v32qi"
4185  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4186	(vec_select:V16QI
4187	  (match_operand:V32QI 1 "register_operand" "x,x")
4188	  (parallel [(const_int 16) (const_int 17)
4189		     (const_int 18) (const_int 19)
4190		     (const_int 20) (const_int 21)
4191		     (const_int 22) (const_int 23)
4192		     (const_int 24) (const_int 25)
4193		     (const_int 26) (const_int 27)
4194		     (const_int 28) (const_int 29)
4195		     (const_int 30) (const_int 31)])))]
4196  "TARGET_AVX"
4197  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4198  [(set_attr "type" "sselog")
4199   (set_attr "prefix_extra" "1")
4200   (set_attr "length_immediate" "1")
4201   (set_attr "memory" "none,store")
4202   (set_attr "prefix" "vex")
4203   (set_attr "mode" "OI")])
4204
4205;; Modes handled by vec_extract patterns.
4206(define_mode_iterator VEC_EXTRACT_MODE
4207  [(V32QI "TARGET_AVX") V16QI
4208   (V16HI "TARGET_AVX") V8HI
4209   (V8SI "TARGET_AVX") V4SI
4210   (V4DI "TARGET_AVX") V2DI
4211   (V8SF "TARGET_AVX") V4SF
4212   (V4DF "TARGET_AVX") V2DF])
4213
4214(define_expand "vec_extract<mode>"
4215  [(match_operand:<ssescalarmode> 0 "register_operand" "")
4216   (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4217   (match_operand 2 "const_int_operand" "")]
4218  "TARGET_SSE"
4219{
4220  ix86_expand_vector_extract (false, operands[0], operands[1],
4221			      INTVAL (operands[2]));
4222  DONE;
4223})
4224
4225;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4226;;
4227;; Parallel double-precision floating point element swizzling
4228;;
4229;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4230
4231;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4232(define_insn "avx_unpckhpd256"
4233  [(set (match_operand:V4DF 0 "register_operand" "=x")
4234	(vec_select:V4DF
4235	  (vec_concat:V8DF
4236	    (match_operand:V4DF 1 "register_operand" "x")
4237	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4238	  (parallel [(const_int 1) (const_int 5)
4239		     (const_int 3) (const_int 7)])))]
4240  "TARGET_AVX"
4241  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4242  [(set_attr "type" "sselog")
4243   (set_attr "prefix" "vex")
4244   (set_attr "mode" "V4DF")])
4245
4246(define_expand "vec_interleave_highv4df"
4247  [(set (match_dup 3)
4248	(vec_select:V4DF
4249	  (vec_concat:V8DF
4250	    (match_operand:V4DF 1 "register_operand" "x")
4251	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4252	  (parallel [(const_int 0) (const_int 4)
4253		     (const_int 2) (const_int 6)])))
4254   (set (match_dup 4)
4255	(vec_select:V4DF
4256	  (vec_concat:V8DF
4257	    (match_dup 1)
4258	    (match_dup 2))
4259	  (parallel [(const_int 1) (const_int 5)
4260		     (const_int 3) (const_int 7)])))
4261   (set (match_operand:V4DF 0 "register_operand" "")
4262	(vec_select:V4DF
4263	  (vec_concat:V8DF
4264	    (match_dup 3)
4265	    (match_dup 4))
4266	  (parallel [(const_int 2) (const_int 3)
4267		     (const_int 6) (const_int 7)])))]
4268 "TARGET_AVX"
4269{
4270  operands[3] = gen_reg_rtx (V4DFmode);
4271  operands[4] = gen_reg_rtx (V4DFmode);
4272})
4273
4274
4275(define_expand "vec_interleave_highv2df"
4276  [(set (match_operand:V2DF 0 "register_operand" "")
4277	(vec_select:V2DF
4278	  (vec_concat:V4DF
4279	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4280	    (match_operand:V2DF 2 "nonimmediate_operand" ""))
4281	  (parallel [(const_int 1)
4282		     (const_int 3)])))]
4283  "TARGET_SSE2"
4284{
4285  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4286    operands[2] = force_reg (V2DFmode, operands[2]);
4287})
4288
4289(define_insn "*vec_interleave_highv2df"
4290  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,m")
4291	(vec_select:V2DF
4292	  (vec_concat:V4DF
4293	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4294	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4295	  (parallel [(const_int 1)
4296		     (const_int 3)])))]
4297  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4298  "@
4299   unpckhpd\t{%2, %0|%0, %2}
4300   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4301   %vmovddup\t{%H1, %0|%0, %H1}
4302   movlpd\t{%H1, %0|%0, %H1}
4303   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4304   %vmovhpd\t{%1, %0|%0, %1}"
4305  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4306  (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4307   (set_attr "prefix_data16" "*,*,*,1,*,1")
4308   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4309   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4310
4311;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4312(define_expand "avx_movddup256"
4313  [(set (match_operand:V4DF 0 "register_operand" "")
4314	(vec_select:V4DF
4315	  (vec_concat:V8DF
4316	    (match_operand:V4DF 1 "nonimmediate_operand" "")
4317	    (match_dup 1))
4318	  (parallel [(const_int 0) (const_int 4)
4319		     (const_int 2) (const_int 6)])))]
4320  "TARGET_AVX")
4321
4322(define_expand "avx_unpcklpd256"
4323  [(set (match_operand:V4DF 0 "register_operand" "")
4324	(vec_select:V4DF
4325	  (vec_concat:V8DF
4326	    (match_operand:V4DF 1 "register_operand" "")
4327	    (match_operand:V4DF 2 "nonimmediate_operand" ""))
4328	  (parallel [(const_int 0) (const_int 4)
4329		     (const_int 2) (const_int 6)])))]
4330  "TARGET_AVX")
4331
4332(define_insn "*avx_unpcklpd256"
4333  [(set (match_operand:V4DF 0 "register_operand"         "=x,x")
4334	(vec_select:V4DF
4335	  (vec_concat:V8DF
4336	    (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4337	    (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4338	  (parallel [(const_int 0) (const_int 4)
4339		     (const_int 2) (const_int 6)])))]
4340  "TARGET_AVX"
4341  "@
4342   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4343   vmovddup\t{%1, %0|%0, %1}"
4344  [(set_attr "type" "sselog")
4345   (set_attr "prefix" "vex")
4346   (set_attr "mode" "V4DF")])
4347
4348(define_expand "vec_interleave_lowv4df"
4349  [(set (match_dup 3)
4350	(vec_select:V4DF
4351	  (vec_concat:V8DF
4352	    (match_operand:V4DF 1 "register_operand" "x")
4353	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4354	  (parallel [(const_int 0) (const_int 4)
4355		     (const_int 2) (const_int 6)])))
4356   (set (match_dup 4)
4357	(vec_select:V4DF
4358	  (vec_concat:V8DF
4359	    (match_dup 1)
4360	    (match_dup 2))
4361	  (parallel [(const_int 1) (const_int 5)
4362		     (const_int 3) (const_int 7)])))
4363   (set (match_operand:V4DF 0 "register_operand" "")
4364	(vec_select:V4DF
4365	  (vec_concat:V8DF
4366	    (match_dup 3)
4367	    (match_dup 4))
4368	  (parallel [(const_int 0) (const_int 1)
4369		     (const_int 4) (const_int 5)])))]
4370 "TARGET_AVX"
4371{
4372  operands[3] = gen_reg_rtx (V4DFmode);
4373  operands[4] = gen_reg_rtx (V4DFmode);
4374})
4375
4376(define_expand "vec_interleave_lowv2df"
4377  [(set (match_operand:V2DF 0 "register_operand" "")
4378	(vec_select:V2DF
4379	  (vec_concat:V4DF
4380	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4381	    (match_operand:V2DF 2 "nonimmediate_operand" ""))
4382	  (parallel [(const_int 0)
4383		     (const_int 2)])))]
4384  "TARGET_SSE2"
4385{
4386  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4387    operands[1] = force_reg (V2DFmode, operands[1]);
4388})
4389
4390(define_insn "*vec_interleave_lowv2df"
4391  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,o")
4392	(vec_select:V2DF
4393	  (vec_concat:V4DF
4394	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4395	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4396	  (parallel [(const_int 0)
4397		     (const_int 2)])))]
4398  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4399  "@
4400   unpcklpd\t{%2, %0|%0, %2}
4401   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4402   %vmovddup\t{%1, %0|%0, %1}
4403   movhpd\t{%2, %0|%0, %2}
4404   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4405   %vmovlpd\t{%2, %H0|%H0, %2}"
4406  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4407   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4408   (set_attr "prefix_data16" "*,*,*,1,*,1")
4409   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4410   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
4411
4412(define_split
4413  [(set (match_operand:V2DF 0 "memory_operand" "")
4414	(vec_select:V2DF
4415	  (vec_concat:V4DF
4416	    (match_operand:V2DF 1 "register_operand" "")
4417	    (match_dup 1))
4418	  (parallel [(const_int 0)
4419		     (const_int 2)])))]
4420  "TARGET_SSE3 && reload_completed"
4421  [(const_int 0)]
4422{
4423  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4424  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4425  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4426  DONE;
4427})
4428
4429(define_split
4430  [(set (match_operand:V2DF 0 "register_operand" "")
4431	(vec_select:V2DF
4432	  (vec_concat:V4DF
4433	    (match_operand:V2DF 1 "memory_operand" "")
4434	    (match_dup 1))
4435	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4436		     (match_operand:SI 3 "const_int_operand" "")])))]
4437  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4438  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4439{
4440  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4441})
4442
4443(define_expand "avx_shufpd256"
4444  [(match_operand:V4DF 0 "register_operand" "")
4445   (match_operand:V4DF 1 "register_operand" "")
4446   (match_operand:V4DF 2 "nonimmediate_operand" "")
4447   (match_operand:SI 3 "const_int_operand" "")]
4448  "TARGET_AVX"
4449{
4450  int mask = INTVAL (operands[3]);
4451  emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4452				   GEN_INT (mask & 1),
4453				   GEN_INT (mask & 2 ? 5 : 4),
4454				   GEN_INT (mask & 4 ? 3 : 2),
4455				   GEN_INT (mask & 8 ? 7 : 6)));
4456  DONE;
4457})
4458
4459(define_insn "avx_shufpd256_1"
4460  [(set (match_operand:V4DF 0 "register_operand" "=x")
4461	(vec_select:V4DF
4462	  (vec_concat:V8DF
4463	    (match_operand:V4DF 1 "register_operand" "x")
4464	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4465	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
4466		     (match_operand 4 "const_4_to_5_operand" "")
4467		     (match_operand 5 "const_2_to_3_operand" "")
4468		     (match_operand 6 "const_6_to_7_operand" "")])))]
4469  "TARGET_AVX"
4470{
4471  int mask;
4472  mask = INTVAL (operands[3]);
4473  mask |= (INTVAL (operands[4]) - 4) << 1;
4474  mask |= (INTVAL (operands[5]) - 2) << 2;
4475  mask |= (INTVAL (operands[6]) - 6) << 3;
4476  operands[3] = GEN_INT (mask);
4477
4478  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4479}
4480  [(set_attr "type" "sselog")
4481   (set_attr "length_immediate" "1")
4482   (set_attr "prefix" "vex")
4483   (set_attr "mode" "V4DF")])
4484
4485(define_expand "sse2_shufpd"
4486  [(match_operand:V2DF 0 "register_operand" "")
4487   (match_operand:V2DF 1 "register_operand" "")
4488   (match_operand:V2DF 2 "nonimmediate_operand" "")
4489   (match_operand:SI 3 "const_int_operand" "")]
4490  "TARGET_SSE2"
4491{
4492  int mask = INTVAL (operands[3]);
4493  emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4494				GEN_INT (mask & 1),
4495				GEN_INT (mask & 2 ? 3 : 2)));
4496  DONE;
4497})
4498
4499;; punpcklqdq and punpckhqdq are shorter than shufpd.
4500(define_insn "avx2_interleave_highv4di"
4501  [(set (match_operand:V4DI 0 "register_operand" "=x")
4502	(vec_select:V4DI
4503	  (vec_concat:V8DI
4504	    (match_operand:V4DI 1 "register_operand" "x")
4505	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4506	  (parallel [(const_int 1)
4507		     (const_int 5)
4508		     (const_int 3)
4509		     (const_int 7)])))]
4510  "TARGET_AVX2"
4511  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4512  [(set_attr "type" "sselog")
4513   (set_attr "prefix" "vex")
4514   (set_attr "mode" "OI")])
4515
4516(define_insn "vec_interleave_highv2di"
4517  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4518	(vec_select:V2DI
4519	  (vec_concat:V4DI
4520	    (match_operand:V2DI 1 "register_operand" "0,x")
4521	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4522	  (parallel [(const_int 1)
4523		     (const_int 3)])))]
4524  "TARGET_SSE2"
4525  "@
4526   punpckhqdq\t{%2, %0|%0, %2}
4527   vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4528  [(set_attr "isa" "noavx,avx")
4529   (set_attr "type" "sselog")
4530   (set_attr "prefix_data16" "1,*")
4531   (set_attr "prefix" "orig,vex")
4532   (set_attr "mode" "TI")])
4533
4534(define_insn "avx2_interleave_lowv4di"
4535  [(set (match_operand:V4DI 0 "register_operand" "=x")
4536	(vec_select:V4DI
4537	  (vec_concat:V8DI
4538	    (match_operand:V4DI 1 "register_operand" "x")
4539	    (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4540	  (parallel [(const_int 0)
4541		     (const_int 4)
4542		     (const_int 2)
4543		     (const_int 6)])))]
4544  "TARGET_AVX2"
4545  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4546  [(set_attr "type" "sselog")
4547   (set_attr "prefix" "vex")
4548   (set_attr "mode" "OI")])
4549
4550(define_insn "vec_interleave_lowv2di"
4551  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4552	(vec_select:V2DI
4553	  (vec_concat:V4DI
4554	    (match_operand:V2DI 1 "register_operand" "0,x")
4555	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4556	  (parallel [(const_int 0)
4557		     (const_int 2)])))]
4558  "TARGET_SSE2"
4559  "@
4560   punpcklqdq\t{%2, %0|%0, %2}
4561   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4562  [(set_attr "isa" "noavx,avx")
4563   (set_attr "type" "sselog")
4564   (set_attr "prefix_data16" "1,*")
4565   (set_attr "prefix" "orig,vex")
4566   (set_attr "mode" "TI")])
4567
4568(define_insn "sse2_shufpd_<mode>"
4569  [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4570	(vec_select:VI8F_128
4571	  (vec_concat:<ssedoublevecmode>
4572	    (match_operand:VI8F_128 1 "register_operand" "0,x")
4573	    (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4574	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
4575		     (match_operand 4 "const_2_to_3_operand" "")])))]
4576  "TARGET_SSE2"
4577{
4578  int mask;
4579  mask = INTVAL (operands[3]);
4580  mask |= (INTVAL (operands[4]) - 2) << 1;
4581  operands[3] = GEN_INT (mask);
4582
4583  switch (which_alternative)
4584    {
4585    case 0:
4586      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4587    case 1:
4588      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4589    default:
4590      gcc_unreachable ();
4591    }
4592}
4593  [(set_attr "isa" "noavx,avx")
4594   (set_attr "type" "sselog")
4595   (set_attr "length_immediate" "1")
4596   (set_attr "prefix" "orig,vex")
4597   (set_attr "mode" "V2DF")])
4598
4599;; Avoid combining registers from different units in a single alternative,
4600;; see comment above inline_secondary_memory_needed function in i386.c
4601(define_insn "sse2_storehpd"
4602  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,x,*f,r")
4603	(vec_select:DF
4604	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4605	  (parallel [(const_int 1)])))]
4606  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4607  "@
4608   %vmovhpd\t{%1, %0|%0, %1}
4609   unpckhpd\t%0, %0
4610   vunpckhpd\t{%d1, %0|%0, %d1}
4611   #
4612   #
4613   #"
4614  [(set_attr "isa" "*,noavx,avx,*,*,*")
4615   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4616   (set (attr "prefix_data16")
4617     (if_then_else
4618       (and (eq_attr "alternative" "0")
4619	    (not (match_test "TARGET_AVX")))
4620       (const_string "1")
4621       (const_string "*")))
4622   (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4623   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4624
4625(define_split
4626  [(set (match_operand:DF 0 "register_operand" "")
4627	(vec_select:DF
4628	  (match_operand:V2DF 1 "memory_operand" "")
4629	  (parallel [(const_int 1)])))]
4630  "TARGET_SSE2 && reload_completed"
4631  [(set (match_dup 0) (match_dup 1))]
4632  "operands[1] = adjust_address (operands[1], DFmode, 8);")
4633
4634(define_insn "*vec_extractv2df_1_sse"
4635  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4636	(vec_select:DF
4637	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4638	  (parallel [(const_int 1)])))]
4639  "!TARGET_SSE2 && TARGET_SSE
4640   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4641  "@
4642   movhps\t{%1, %0|%0, %1}
4643   movhlps\t{%1, %0|%0, %1}
4644   movlps\t{%H1, %0|%0, %H1}"
4645  [(set_attr "type" "ssemov")
4646   (set_attr "mode" "V2SF,V4SF,V2SF")])
4647
4648;; Avoid combining registers from different units in a single alternative,
4649;; see comment above inline_secondary_memory_needed function in i386.c
4650(define_insn "sse2_storelpd"
4651  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
4652	(vec_select:DF
4653	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4654	  (parallel [(const_int 0)])))]
4655  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4656  "@
4657   %vmovlpd\t{%1, %0|%0, %1}
4658   #
4659   #
4660   #
4661   #"
4662  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4663   (set_attr "prefix_data16" "1,*,*,*,*")
4664   (set_attr "prefix" "maybe_vex")
4665   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4666
4667(define_split
4668  [(set (match_operand:DF 0 "register_operand" "")
4669	(vec_select:DF
4670	  (match_operand:V2DF 1 "nonimmediate_operand" "")
4671	  (parallel [(const_int 0)])))]
4672  "TARGET_SSE2 && reload_completed"
4673  [(const_int 0)]
4674{
4675  rtx op1 = operands[1];
4676  if (REG_P (op1))
4677    op1 = gen_rtx_REG (DFmode, REGNO (op1));
4678  else
4679    op1 = gen_lowpart (DFmode, op1);
4680  emit_move_insn (operands[0], op1);
4681  DONE;
4682})
4683
4684(define_insn "*vec_extractv2df_0_sse"
4685  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4686	(vec_select:DF
4687	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4688	  (parallel [(const_int 0)])))]
4689  "!TARGET_SSE2 && TARGET_SSE
4690   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4691  "@
4692   movlps\t{%1, %0|%0, %1}
4693   movaps\t{%1, %0|%0, %1}
4694   movlps\t{%1, %0|%0, %1}"
4695  [(set_attr "type" "ssemov")
4696   (set_attr "mode" "V2SF,V4SF,V2SF")])
4697
4698(define_expand "sse2_loadhpd_exp"
4699  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4700	(vec_concat:V2DF
4701	  (vec_select:DF
4702	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4703	    (parallel [(const_int 0)]))
4704	  (match_operand:DF 2 "nonimmediate_operand" "")))]
4705  "TARGET_SSE2"
4706{
4707  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4708
4709  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4710
4711  /* Fix up the destination if needed.  */
4712  if (dst != operands[0])
4713    emit_move_insn (operands[0], dst);
4714
4715  DONE;
4716})
4717
4718;; Avoid combining registers from different units in a single alternative,
4719;; see comment above inline_secondary_memory_needed function in i386.c
4720(define_insn "sse2_loadhpd"
4721  [(set (match_operand:V2DF 0 "nonimmediate_operand"
4722	  "=x,x,x,x,o,o ,o")
4723	(vec_concat:V2DF
4724	  (vec_select:DF
4725	    (match_operand:V2DF 1 "nonimmediate_operand"
4726	  " 0,x,0,x,0,0 ,0")
4727	    (parallel [(const_int 0)]))
4728	  (match_operand:DF 2 "nonimmediate_operand"
4729	  " m,m,x,x,x,*f,r")))]
4730  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4731  "@
4732   movhpd\t{%2, %0|%0, %2}
4733   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4734   unpcklpd\t{%2, %0|%0, %2}
4735   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4736   #
4737   #
4738   #"
4739  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4740   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4741   (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4742   (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4743   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4744
4745(define_split
4746  [(set (match_operand:V2DF 0 "memory_operand" "")
4747	(vec_concat:V2DF
4748	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4749	  (match_operand:DF 1 "register_operand" "")))]
4750  "TARGET_SSE2 && reload_completed"
4751  [(set (match_dup 0) (match_dup 1))]
4752  "operands[0] = adjust_address (operands[0], DFmode, 8);")
4753
4754(define_expand "sse2_loadlpd_exp"
4755  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4756	(vec_concat:V2DF
4757	  (match_operand:DF 2 "nonimmediate_operand" "")
4758	  (vec_select:DF
4759	    (match_operand:V2DF 1 "nonimmediate_operand" "")
4760	    (parallel [(const_int 1)]))))]
4761  "TARGET_SSE2"
4762{
4763  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4764
4765  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4766
4767  /* Fix up the destination if needed.  */
4768  if (dst != operands[0])
4769    emit_move_insn (operands[0], dst);
4770
4771  DONE;
4772})
4773
4774;; Avoid combining registers from different units in a single alternative,
4775;; see comment above inline_secondary_memory_needed function in i386.c
4776(define_insn "sse2_loadlpd"
4777  [(set (match_operand:V2DF 0 "nonimmediate_operand"
4778	  "=x,x,x,x,x,x,x,x,m,m ,m")
4779	(vec_concat:V2DF
4780	  (match_operand:DF 2 "nonimmediate_operand"
4781	  " m,m,m,x,x,0,0,x,x,*f,r")
4782	  (vec_select:DF
4783	    (match_operand:V2DF 1 "vector_move_operand"
4784	  " C,0,x,0,x,x,o,o,0,0 ,0")
4785	    (parallel [(const_int 1)]))))]
4786  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4787  "@
4788   %vmovsd\t{%2, %0|%0, %2}
4789   movlpd\t{%2, %0|%0, %2}
4790   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4791   movsd\t{%2, %0|%0, %2}
4792   vmovsd\t{%2, %1, %0|%0, %1, %2}
4793   shufpd\t{$2, %1, %0|%0, %1, 2}
4794   movhpd\t{%H1, %0|%0, %H1}
4795   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4796   #
4797   #
4798   #"
4799  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4800   (set (attr "type")
4801     (cond [(eq_attr "alternative" "5")
4802	      (const_string "sselog")
4803	    (eq_attr "alternative" "9")
4804	      (const_string "fmov")
4805	    (eq_attr "alternative" "10")
4806	      (const_string "imov")
4807	   ]
4808	   (const_string "ssemov")))
4809   (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4810   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4811   (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4812   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4813
4814(define_split
4815  [(set (match_operand:V2DF 0 "memory_operand" "")
4816	(vec_concat:V2DF
4817	  (match_operand:DF 1 "register_operand" "")
4818	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4819  "TARGET_SSE2 && reload_completed"
4820  [(set (match_dup 0) (match_dup 1))]
4821  "operands[0] = adjust_address (operands[0], DFmode, 0);")
4822
4823(define_insn "sse2_movsd"
4824  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
4825	(vec_merge:V2DF
4826	  (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4827	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4828	  (const_int 1)))]
4829  "TARGET_SSE2"
4830  "@
4831   movsd\t{%2, %0|%0, %2}
4832   vmovsd\t{%2, %1, %0|%0, %1, %2}
4833   movlpd\t{%2, %0|%0, %2}
4834   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4835   %vmovlpd\t{%2, %0|%0, %2}
4836   shufpd\t{$2, %1, %0|%0, %1, 2}
4837   movhps\t{%H1, %0|%0, %H1}
4838   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4839   %vmovhps\t{%1, %H0|%H0, %1}"
4840  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4841   (set (attr "type")
4842     (if_then_else
4843       (eq_attr "alternative" "5")
4844       (const_string "sselog")
4845       (const_string "ssemov")))
4846   (set (attr "prefix_data16")
4847     (if_then_else
4848       (and (eq_attr "alternative" "2,4")
4849	    (not (match_test "TARGET_AVX")))
4850       (const_string "1")
4851       (const_string "*")))
4852   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4853   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4854   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4855
4856(define_insn "vec_dupv2df"
4857  [(set (match_operand:V2DF 0 "register_operand"     "=x,x")
4858	(vec_duplicate:V2DF
4859	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4860  "TARGET_SSE2"
4861  "@
4862   unpcklpd\t%0, %0
4863   %vmovddup\t{%1, %0|%0, %1}"
4864  [(set_attr "isa" "noavx,sse3")
4865   (set_attr "type" "sselog1")
4866   (set_attr "prefix" "orig,maybe_vex")
4867   (set_attr "mode" "V2DF,DF")])
4868
4869(define_insn "*vec_concatv2df"
4870  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,x,x,x,x,x,x")
4871	(vec_concat:V2DF
4872	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4873	  (match_operand:DF 2 "vector_move_operand"  " x,x,1,m,m,C,x,m")))]
4874  "TARGET_SSE"
4875  "@
4876   unpcklpd\t{%2, %0|%0, %2}
4877   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4878   %vmovddup\t{%1, %0|%0, %1}
4879   movhpd\t{%2, %0|%0, %2}
4880   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4881   %vmovsd\t{%1, %0|%0, %1}
4882   movlhps\t{%2, %0|%0, %2}
4883   movhps\t{%2, %0|%0, %2}"
4884  [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4885   (set (attr "type")
4886     (if_then_else
4887       (eq_attr "alternative" "0,1,2")
4888       (const_string "sselog")
4889       (const_string "ssemov")))
4890   (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4891   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4892   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4893
4894;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4895;;
4896;; Parallel integral arithmetic
4897;;
4898;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4899
4900(define_expand "neg<mode>2"
4901  [(set (match_operand:VI_AVX2 0 "register_operand" "")
4902	(minus:VI_AVX2
4903	  (match_dup 2)
4904	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4905  "TARGET_SSE2"
4906  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4907
4908(define_expand "<plusminus_insn><mode>3"
4909  [(set (match_operand:VI_AVX2 0 "register_operand" "")
4910	(plusminus:VI_AVX2
4911	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4912	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4913  "TARGET_SSE2"
4914  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4915
4916(define_insn "*<plusminus_insn><mode>3"
4917  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4918	(plusminus:VI_AVX2
4919	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4920	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4921  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4922  "@
4923   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4924   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4925  [(set_attr "isa" "noavx,avx")
4926   (set_attr "type" "sseiadd")
4927   (set_attr "prefix_data16" "1,*")
4928   (set_attr "prefix" "orig,vex")
4929   (set_attr "mode" "<sseinsnmode>")])
4930
4931(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4932  [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4933	(sat_plusminus:VI12_AVX2
4934	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4935	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4936  "TARGET_SSE2"
4937  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4938
4939(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4940  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4941	(sat_plusminus:VI12_AVX2
4942	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4943	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4944  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4945  "@
4946   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4947   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4948  [(set_attr "isa" "noavx,avx")
4949   (set_attr "type" "sseiadd")
4950   (set_attr "prefix_data16" "1,*")
4951   (set_attr "prefix" "orig,vex")
4952   (set_attr "mode" "TI")])
4953
4954(define_insn_and_split "mul<mode>3"
4955  [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4956	(mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4957		       (match_operand:VI1_AVX2 2 "register_operand" "")))]
4958  "TARGET_SSE2
4959   && can_create_pseudo_p ()"
4960  "#"
4961  "&& 1"
4962  [(const_int 0)]
4963{
4964  rtx t[6];
4965  int i;
4966  enum machine_mode mulmode = <sseunpackmode>mode;
4967
4968  for (i = 0; i < 6; ++i)
4969    t[i] = gen_reg_rtx (<MODE>mode);
4970
4971  /* Unpack data such that we've got a source byte in each low byte of
4972     each word.  We don't care what goes into the high byte of each word.
4973     Rather than trying to get zero in there, most convenient is to let
4974     it be a copy of the low byte.  */
4975  emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4976						   operands[1]));
4977  emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4978						   operands[2]));
4979  emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4980						  operands[1]));
4981  emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4982						  operands[2]));
4983
4984  /* Multiply words.  The end-of-line annotations here give a picture of what
4985     the output of that instruction looks like.  Dot means don't care; the
4986     letters are the bytes of the result with A being the most significant.  */
4987  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4988			  gen_rtx_MULT (mulmode,	/* .A.B.C.D.E.F.G.H */
4989					gen_lowpart (mulmode, t[0]),
4990					gen_lowpart (mulmode, t[1]))));
4991  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4992			  gen_rtx_MULT (mulmode,	/* .I.J.K.L.M.N.O.P */
4993					gen_lowpart (mulmode, t[2]),
4994					gen_lowpart (mulmode, t[3]))));
4995
4996  /* Extract the even bytes and merge them back together.  */
4997  if (<MODE>mode == V16QImode)
4998    ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4999  else
5000    {
5001      /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
5002	 this can't be normal even extraction, but one where additionally
5003	 the second and third quarter are swapped.  That is even one insn
5004	 shorter than even extraction.  */
5005      rtvec v = rtvec_alloc (32);
5006      for (i = 0; i < 32; ++i)
5007	RTVEC_ELT (v, i)
5008	  = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
5009      t[0] = operands[0];
5010      t[1] = t[5];
5011      t[2] = t[4];
5012      t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
5013      ix86_expand_vec_perm_const (t);
5014    }
5015
5016  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5017		       gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5018  DONE;
5019})
5020
5021(define_expand "mul<mode>3"
5022  [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5023	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5024		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5025  "TARGET_SSE2"
5026  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5027
5028(define_insn "*mul<mode>3"
5029  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5030	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5031		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5032  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5033  "@
5034   pmullw\t{%2, %0|%0, %2}
5035   vpmullw\t{%2, %1, %0|%0, %1, %2}"
5036  [(set_attr "isa" "noavx,avx")
5037   (set_attr "type" "sseimul")
5038   (set_attr "prefix_data16" "1,*")
5039   (set_attr "prefix" "orig,vex")
5040   (set_attr "mode" "<sseinsnmode>")])
5041
5042(define_expand "<s>mul<mode>3_highpart"
5043  [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5044	(truncate:VI2_AVX2
5045	  (lshiftrt:<ssedoublemode>
5046	    (mult:<ssedoublemode>
5047	      (any_extend:<ssedoublemode>
5048		(match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5049	      (any_extend:<ssedoublemode>
5050		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5051	    (const_int 16))))]
5052  "TARGET_SSE2"
5053  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5054
5055(define_insn "*<s>mul<mode>3_highpart"
5056  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5057	(truncate:VI2_AVX2
5058	  (lshiftrt:<ssedoublemode>
5059	    (mult:<ssedoublemode>
5060	      (any_extend:<ssedoublemode>
5061		(match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5062	      (any_extend:<ssedoublemode>
5063		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5064	    (const_int 16))))]
5065  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5066  "@
5067   pmulh<u>w\t{%2, %0|%0, %2}
5068   vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5069  [(set_attr "isa" "noavx,avx")
5070   (set_attr "type" "sseimul")
5071   (set_attr "prefix_data16" "1,*")
5072   (set_attr "prefix" "orig,vex")
5073   (set_attr "mode" "<sseinsnmode>")])
5074
5075(define_expand "avx2_umulv4siv4di3"
5076  [(set (match_operand:V4DI 0 "register_operand" "")
5077	(mult:V4DI
5078	  (zero_extend:V4DI
5079	    (vec_select:V4SI
5080	      (match_operand:V8SI 1 "nonimmediate_operand" "")
5081	      (parallel [(const_int 0) (const_int 2)
5082			 (const_int 4) (const_int 6)])))
5083	  (zero_extend:V4DI
5084	    (vec_select:V4SI
5085	      (match_operand:V8SI 2 "nonimmediate_operand" "")
5086	      (parallel [(const_int 0) (const_int 2)
5087			 (const_int 4) (const_int 6)])))))]
5088  "TARGET_AVX2"
5089  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5090
5091(define_insn "*avx_umulv4siv4di3"
5092  [(set (match_operand:V4DI 0 "register_operand" "=x")
5093	(mult:V4DI
5094	  (zero_extend:V4DI
5095	    (vec_select:V4SI
5096	      (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5097	      (parallel [(const_int 0) (const_int 2)
5098			 (const_int 4) (const_int 6)])))
5099	  (zero_extend:V4DI
5100	    (vec_select:V4SI
5101	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5102	      (parallel [(const_int 0) (const_int 2)
5103			 (const_int 4) (const_int 6)])))))]
5104  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5105  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5106  [(set_attr "type" "sseimul")
5107   (set_attr "prefix" "vex")
5108   (set_attr "mode" "OI")])
5109
5110(define_expand "sse2_umulv2siv2di3"
5111  [(set (match_operand:V2DI 0 "register_operand" "")
5112	(mult:V2DI
5113	  (zero_extend:V2DI
5114	    (vec_select:V2SI
5115	      (match_operand:V4SI 1 "nonimmediate_operand" "")
5116	      (parallel [(const_int 0) (const_int 2)])))
5117	  (zero_extend:V2DI
5118	    (vec_select:V2SI
5119	      (match_operand:V4SI 2 "nonimmediate_operand" "")
5120	      (parallel [(const_int 0) (const_int 2)])))))]
5121  "TARGET_SSE2"
5122  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5123
5124(define_insn "*sse2_umulv2siv2di3"
5125  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5126	(mult:V2DI
5127	  (zero_extend:V2DI
5128	    (vec_select:V2SI
5129	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5130	      (parallel [(const_int 0) (const_int 2)])))
5131	  (zero_extend:V2DI
5132	    (vec_select:V2SI
5133	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5134	      (parallel [(const_int 0) (const_int 2)])))))]
5135  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5136  "@
5137   pmuludq\t{%2, %0|%0, %2}
5138   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5139  [(set_attr "isa" "noavx,avx")
5140   (set_attr "type" "sseimul")
5141   (set_attr "prefix_data16" "1,*")
5142   (set_attr "prefix" "orig,vex")
5143   (set_attr "mode" "TI")])
5144
5145(define_expand "avx2_mulv4siv4di3"
5146  [(set (match_operand:V4DI 0 "register_operand" "")
5147	(mult:V4DI
5148	  (sign_extend:V4DI
5149	    (vec_select:V4SI
5150	      (match_operand:V8SI 1 "nonimmediate_operand" "")
5151	      (parallel [(const_int 0) (const_int 2)
5152			 (const_int 4) (const_int 6)])))
5153	  (sign_extend:V4DI
5154	    (vec_select:V4SI
5155	      (match_operand:V8SI 2 "nonimmediate_operand" "")
5156	      (parallel [(const_int 0) (const_int 2)
5157			 (const_int 4) (const_int 6)])))))]
5158  "TARGET_AVX2"
5159  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5160
5161(define_insn "*avx2_mulv4siv4di3"
5162  [(set (match_operand:V4DI 0 "register_operand" "=x")
5163	(mult:V4DI
5164	  (sign_extend:V4DI
5165	    (vec_select:V4SI
5166	      (match_operand:V8SI 1 "nonimmediate_operand" "x")
5167	      (parallel [(const_int 0) (const_int 2)
5168			 (const_int 4) (const_int 6)])))
5169	  (sign_extend:V4DI
5170	    (vec_select:V4SI
5171	      (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5172	      (parallel [(const_int 0) (const_int 2)
5173			 (const_int 4) (const_int 6)])))))]
5174  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5175  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5176  [(set_attr "isa" "avx")
5177   (set_attr "type" "sseimul")
5178   (set_attr "prefix_extra" "1")
5179   (set_attr "prefix" "vex")
5180   (set_attr "mode" "OI")])
5181
5182(define_expand "sse4_1_mulv2siv2di3"
5183  [(set (match_operand:V2DI 0 "register_operand" "")
5184	(mult:V2DI
5185	  (sign_extend:V2DI
5186	    (vec_select:V2SI
5187	      (match_operand:V4SI 1 "nonimmediate_operand" "")
5188	      (parallel [(const_int 0) (const_int 2)])))
5189	  (sign_extend:V2DI
5190	    (vec_select:V2SI
5191	      (match_operand:V4SI 2 "nonimmediate_operand" "")
5192	      (parallel [(const_int 0) (const_int 2)])))))]
5193  "TARGET_SSE4_1"
5194  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5195
5196(define_insn "*sse4_1_mulv2siv2di3"
5197  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5198	(mult:V2DI
5199	  (sign_extend:V2DI
5200	    (vec_select:V2SI
5201	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5202	      (parallel [(const_int 0) (const_int 2)])))
5203	  (sign_extend:V2DI
5204	    (vec_select:V2SI
5205	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5206	      (parallel [(const_int 0) (const_int 2)])))))]
5207  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5208  "@
5209   pmuldq\t{%2, %0|%0, %2}
5210   vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5211  [(set_attr "isa" "noavx,avx")
5212   (set_attr "type" "sseimul")
5213   (set_attr "prefix_data16" "1,*")
5214   (set_attr "prefix_extra" "1")
5215   (set_attr "prefix" "orig,vex")
5216   (set_attr "mode" "TI")])
5217
5218(define_expand "avx2_pmaddwd"
5219  [(set (match_operand:V8SI 0 "register_operand" "")
5220	(plus:V8SI
5221	  (mult:V8SI
5222	    (sign_extend:V8SI
5223	      (vec_select:V8HI
5224		(match_operand:V16HI 1 "nonimmediate_operand" "")
5225		(parallel [(const_int 0)
5226			   (const_int 2)
5227			   (const_int 4)
5228			   (const_int 6)
5229			   (const_int 8)
5230			   (const_int 10)
5231			   (const_int 12)
5232			   (const_int 14)])))
5233	    (sign_extend:V8SI
5234	      (vec_select:V8HI
5235		(match_operand:V16HI 2 "nonimmediate_operand" "")
5236		(parallel [(const_int 0)
5237			   (const_int 2)
5238			   (const_int 4)
5239			   (const_int 6)
5240			   (const_int 8)
5241			   (const_int 10)
5242			   (const_int 12)
5243			   (const_int 14)]))))
5244	  (mult:V8SI
5245	    (sign_extend:V8SI
5246	      (vec_select:V8HI (match_dup 1)
5247		(parallel [(const_int 1)
5248			   (const_int 3)
5249			   (const_int 5)
5250			   (const_int 7)
5251			   (const_int 9)
5252			   (const_int 11)
5253			   (const_int 13)
5254			   (const_int 15)])))
5255	    (sign_extend:V8SI
5256	      (vec_select:V8HI (match_dup 2)
5257		(parallel [(const_int 1)
5258			   (const_int 3)
5259			   (const_int 5)
5260			   (const_int 7)
5261			   (const_int 9)
5262			   (const_int 11)
5263			   (const_int 13)
5264			   (const_int 15)]))))))]
5265  "TARGET_AVX2"
5266  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5267
5268(define_expand "sse2_pmaddwd"
5269  [(set (match_operand:V4SI 0 "register_operand" "")
5270	(plus:V4SI
5271	  (mult:V4SI
5272	    (sign_extend:V4SI
5273	      (vec_select:V4HI
5274		(match_operand:V8HI 1 "nonimmediate_operand" "")
5275		(parallel [(const_int 0)
5276			   (const_int 2)
5277			   (const_int 4)
5278			   (const_int 6)])))
5279	    (sign_extend:V4SI
5280	      (vec_select:V4HI
5281		(match_operand:V8HI 2 "nonimmediate_operand" "")
5282		(parallel [(const_int 0)
5283			   (const_int 2)
5284			   (const_int 4)
5285			   (const_int 6)]))))
5286	  (mult:V4SI
5287	    (sign_extend:V4SI
5288	      (vec_select:V4HI (match_dup 1)
5289		(parallel [(const_int 1)
5290			   (const_int 3)
5291			   (const_int 5)
5292			   (const_int 7)])))
5293	    (sign_extend:V4SI
5294	      (vec_select:V4HI (match_dup 2)
5295		(parallel [(const_int 1)
5296			   (const_int 3)
5297			   (const_int 5)
5298			   (const_int 7)]))))))]
5299  "TARGET_SSE2"
5300  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5301
5302(define_insn "*avx2_pmaddwd"
5303  [(set (match_operand:V8SI 0 "register_operand" "=x")
5304	(plus:V8SI
5305	  (mult:V8SI
5306	    (sign_extend:V8SI
5307	      (vec_select:V8HI
5308		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
5309		(parallel [(const_int 0)
5310			   (const_int 2)
5311			   (const_int 4)
5312			   (const_int 6)
5313			   (const_int 8)
5314			   (const_int 10)
5315			   (const_int 12)
5316			   (const_int 14)])))
5317	    (sign_extend:V8SI
5318	      (vec_select:V8HI
5319		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
5320		(parallel [(const_int 0)
5321			   (const_int 2)
5322			   (const_int 4)
5323			   (const_int 6)
5324			   (const_int 8)
5325			   (const_int 10)
5326			   (const_int 12)
5327			   (const_int 14)]))))
5328	  (mult:V8SI
5329	    (sign_extend:V8SI
5330	      (vec_select:V8HI (match_dup 1)
5331		(parallel [(const_int 1)
5332			   (const_int 3)
5333			   (const_int 5)
5334			   (const_int 7)
5335			   (const_int 9)
5336			   (const_int 11)
5337			   (const_int 13)
5338			   (const_int 15)])))
5339	    (sign_extend:V8SI
5340	      (vec_select:V8HI (match_dup 2)
5341		(parallel [(const_int 1)
5342			   (const_int 3)
5343			   (const_int 5)
5344			   (const_int 7)
5345			   (const_int 9)
5346			   (const_int 11)
5347			   (const_int 13)
5348			   (const_int 15)]))))))]
5349  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5350  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5351  [(set_attr "type" "sseiadd")
5352   (set_attr "prefix" "vex")
5353   (set_attr "mode" "OI")])
5354
5355(define_insn "*sse2_pmaddwd"
5356  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5357	(plus:V4SI
5358	  (mult:V4SI
5359	    (sign_extend:V4SI
5360	      (vec_select:V4HI
5361		(match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5362		(parallel [(const_int 0)
5363			   (const_int 2)
5364			   (const_int 4)
5365			   (const_int 6)])))
5366	    (sign_extend:V4SI
5367	      (vec_select:V4HI
5368		(match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5369		(parallel [(const_int 0)
5370			   (const_int 2)
5371			   (const_int 4)
5372			   (const_int 6)]))))
5373	  (mult:V4SI
5374	    (sign_extend:V4SI
5375	      (vec_select:V4HI (match_dup 1)
5376		(parallel [(const_int 1)
5377			   (const_int 3)
5378			   (const_int 5)
5379			   (const_int 7)])))
5380	    (sign_extend:V4SI
5381	      (vec_select:V4HI (match_dup 2)
5382		(parallel [(const_int 1)
5383			   (const_int 3)
5384			   (const_int 5)
5385			   (const_int 7)]))))))]
5386  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5387  "@
5388   pmaddwd\t{%2, %0|%0, %2}
5389   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5390  [(set_attr "isa" "noavx,avx")
5391   (set_attr "type" "sseiadd")
5392   (set_attr "atom_unit" "simul")
5393   (set_attr "prefix_data16" "1,*")
5394   (set_attr "prefix" "orig,vex")
5395   (set_attr "mode" "TI")])
5396
5397(define_expand "mul<mode>3"
5398  [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5399	(mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5400		       (match_operand:VI4_AVX2 2 "register_operand" "")))]
5401  "TARGET_SSE2"
5402{
5403  if (TARGET_SSE4_1 || TARGET_AVX)
5404    ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5405})
5406
5407(define_insn "*<sse4_1_avx2>_mul<mode>3"
5408  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5409	(mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5410		       (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5411  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5412  "@
5413   pmulld\t{%2, %0|%0, %2}
5414   vpmulld\t{%2, %1, %0|%0, %1, %2}"
5415  [(set_attr "isa" "noavx,avx")
5416   (set_attr "type" "sseimul")
5417   (set_attr "prefix_extra" "1")
5418   (set_attr "prefix" "orig,vex")
5419   (set_attr "mode" "<sseinsnmode>")])
5420
5421(define_insn_and_split "*sse2_mulv4si3"
5422  [(set (match_operand:V4SI 0 "register_operand" "")
5423	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5424		   (match_operand:V4SI 2 "register_operand" "")))]
5425  "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5426   && can_create_pseudo_p ()"
5427  "#"
5428  "&& 1"
5429  [(const_int 0)]
5430{
5431  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5432  rtx op0, op1, op2;
5433
5434  op0 = operands[0];
5435  op1 = operands[1];
5436  op2 = operands[2];
5437  t1 = gen_reg_rtx (V4SImode);
5438  t2 = gen_reg_rtx (V4SImode);
5439  t3 = gen_reg_rtx (V4SImode);
5440  t4 = gen_reg_rtx (V4SImode);
5441  t5 = gen_reg_rtx (V4SImode);
5442  t6 = gen_reg_rtx (V4SImode);
5443  thirtytwo = GEN_INT (32);
5444
5445  /* Multiply elements 2 and 0.  */
5446  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5447				     op1, op2));
5448
5449  /* Shift both input vectors down one element, so that elements 3
5450     and 1 are now in the slots for elements 2 and 0.  For K8, at
5451     least, this is faster than using a shuffle.  */
5452  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5453				 gen_lowpart (V1TImode, op1),
5454				 thirtytwo));
5455  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5456				 gen_lowpart (V1TImode, op2),
5457				 thirtytwo));
5458  /* Multiply elements 3 and 1.  */
5459  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5460				     t2, t3));
5461
5462  /* Move the results in element 2 down to element 1; we don't care
5463     what goes in elements 2 and 3.  */
5464  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5465				const0_rtx, const0_rtx));
5466  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5467				const0_rtx, const0_rtx));
5468
5469  /* Merge the parts back together.  */
5470  emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5471
5472  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5473		       gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5474  DONE;
5475})
5476
5477(define_insn_and_split "mul<mode>3"
5478  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5479	(mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5480		       (match_operand:VI8_AVX2 2 "register_operand" "")))]
5481  "TARGET_SSE2
5482   && can_create_pseudo_p ()"
5483  "#"
5484  "&& 1"
5485  [(const_int 0)]
5486{
5487  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5488  rtx op0, op1, op2;
5489
5490  op0 = operands[0];
5491  op1 = operands[1];
5492  op2 = operands[2];
5493
5494  if (TARGET_XOP && <MODE>mode == V2DImode)
5495    {
5496      /* op1: A,B,C,D, op2: E,F,G,H */
5497      op1 = gen_lowpart (V4SImode, op1);
5498      op2 = gen_lowpart (V4SImode, op2);
5499
5500      t1 = gen_reg_rtx (V4SImode);
5501      t2 = gen_reg_rtx (V4SImode);
5502      t3 = gen_reg_rtx (V2DImode);
5503      t4 = gen_reg_rtx (V2DImode);
5504
5505      /* t1: B,A,D,C */
5506      emit_insn (gen_sse2_pshufd_1 (t1, op1,
5507				    GEN_INT (1),
5508				    GEN_INT (0),
5509				    GEN_INT (3),
5510				    GEN_INT (2)));
5511
5512      /* t2: (B*E),(A*F),(D*G),(C*H) */
5513      emit_insn (gen_mulv4si3 (t2, t1, op2));
5514
5515      /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5516      emit_insn (gen_xop_phadddq (t3, t2));
5517
5518      /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5519      emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5520
5521      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5522      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5523    }
5524  else
5525    {
5526      t1 = gen_reg_rtx (<MODE>mode);
5527      t2 = gen_reg_rtx (<MODE>mode);
5528      t3 = gen_reg_rtx (<MODE>mode);
5529      t4 = gen_reg_rtx (<MODE>mode);
5530      t5 = gen_reg_rtx (<MODE>mode);
5531      t6 = gen_reg_rtx (<MODE>mode);
5532      thirtytwo = GEN_INT (32);
5533
5534      /* Multiply low parts.  */
5535      emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5536		  (t1, gen_lowpart (<ssepackmode>mode, op1),
5537		   gen_lowpart (<ssepackmode>mode, op2)));
5538
5539      /* Shift input vectors right 32 bits so we can multiply high parts.  */
5540      emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5541      emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5542
5543      /* Multiply high parts by low parts.  */
5544      emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5545		  (t4, gen_lowpart (<ssepackmode>mode, op1),
5546		   gen_lowpart (<ssepackmode>mode, t3)));
5547      emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5548		  (t5, gen_lowpart (<ssepackmode>mode, op2),
5549		   gen_lowpart (<ssepackmode>mode, t2)));
5550
5551      /* Shift them back.  */
5552      emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5553      emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5554
5555      /* Add the three parts together.  */
5556      emit_insn (gen_add<mode>3 (t6, t1, t4));
5557      emit_insn (gen_add<mode>3 (op0, t6, t5));
5558    }
5559
5560  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5561		       gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5562  DONE;
5563})
5564
5565(define_expand "vec_widen_<s>mult_hi_<mode>"
5566  [(match_operand:<sseunpackmode> 0 "register_operand" "")
5567   (any_extend:<sseunpackmode>
5568     (match_operand:VI2_AVX2 1 "register_operand" ""))
5569   (match_operand:VI2_AVX2 2 "register_operand" "")]
5570  "TARGET_SSE2"
5571{
5572  rtx op1, op2, t1, t2, dest;
5573
5574  op1 = operands[1];
5575  op2 = operands[2];
5576  t1 = gen_reg_rtx (<MODE>mode);
5577  t2 = gen_reg_rtx (<MODE>mode);
5578  dest = gen_lowpart (<MODE>mode, operands[0]);
5579
5580  emit_insn (gen_mul<mode>3 (t1, op1, op2));
5581  emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5582  emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5583  DONE;
5584})
5585
5586(define_expand "vec_widen_<s>mult_lo_<mode>"
5587  [(match_operand:<sseunpackmode> 0 "register_operand" "")
5588   (any_extend:<sseunpackmode>
5589     (match_operand:VI2_AVX2 1 "register_operand" ""))
5590   (match_operand:VI2_AVX2 2 "register_operand" "")]
5591  "TARGET_SSE2"
5592{
5593  rtx op1, op2, t1, t2, dest;
5594
5595  op1 = operands[1];
5596  op2 = operands[2];
5597  t1 = gen_reg_rtx (<MODE>mode);
5598  t2 = gen_reg_rtx (<MODE>mode);
5599  dest = gen_lowpart (<MODE>mode, operands[0]);
5600
5601  emit_insn (gen_mul<mode>3 (t1, op1, op2));
5602  emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5603  emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5604  DONE;
5605})
5606
5607(define_expand "vec_widen_<s>mult_hi_v8si"
5608  [(match_operand:V4DI 0 "register_operand" "")
5609   (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5610   (match_operand:V8SI 2 "nonimmediate_operand" "")]
5611  "TARGET_AVX2"
5612{
5613  rtx t1, t2, t3, t4;
5614
5615  t1 = gen_reg_rtx (V4DImode);
5616  t2 = gen_reg_rtx (V4DImode);
5617  t3 = gen_reg_rtx (V8SImode);
5618  t4 = gen_reg_rtx (V8SImode);
5619  emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5620				  const0_rtx, const2_rtx,
5621				  const1_rtx, GEN_INT (3)));
5622  emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5623				  const0_rtx, const2_rtx,
5624				  const1_rtx, GEN_INT (3)));
5625  emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5626				GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5627  emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5628				GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5629  emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5630  DONE;
5631})
5632
5633(define_expand "vec_widen_<s>mult_lo_v8si"
5634  [(match_operand:V4DI 0 "register_operand" "")
5635   (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5636   (match_operand:V8SI 2 "nonimmediate_operand" "")]
5637  "TARGET_AVX2"
5638{
5639  rtx t1, t2, t3, t4;
5640
5641  t1 = gen_reg_rtx (V4DImode);
5642  t2 = gen_reg_rtx (V4DImode);
5643  t3 = gen_reg_rtx (V8SImode);
5644  t4 = gen_reg_rtx (V8SImode);
5645  emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5646				  const0_rtx, const2_rtx,
5647				  const1_rtx, GEN_INT (3)));
5648  emit_insn (gen_avx2_permv4di_1 (t2,  gen_lowpart (V4DImode, operands[2]),
5649				  const0_rtx, const2_rtx,
5650				  const1_rtx, GEN_INT (3)));
5651  emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5652				GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5653  emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5654				GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5655  emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5656  DONE;
5657})
5658
5659(define_expand "vec_widen_smult_hi_v4si"
5660  [(match_operand:V2DI 0 "register_operand" "")
5661   (match_operand:V4SI 1 "register_operand" "")
5662   (match_operand:V4SI 2 "register_operand" "")]
5663  "TARGET_SSE4_1"
5664{
5665  rtx op1, op2, t1, t2;
5666
5667  op1 = operands[1];
5668  op2 = operands[2];
5669  t1 = gen_reg_rtx (V4SImode);
5670  t2 = gen_reg_rtx (V4SImode);
5671
5672  if (TARGET_XOP)
5673    {
5674      rtx t3 = gen_reg_rtx (V2DImode);
5675
5676      emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5677				    GEN_INT (1), GEN_INT (3)));
5678      emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5679				    GEN_INT (1), GEN_INT (3)));
5680      emit_move_insn (t3, CONST0_RTX (V2DImode));
5681
5682      emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3));
5683      DONE;
5684    }
5685
5686  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5687  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5688  emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5689  DONE;
5690})
5691
5692(define_expand "vec_widen_smult_lo_v4si"
5693  [(match_operand:V2DI 0 "register_operand" "")
5694   (match_operand:V4SI 1 "register_operand" "")
5695   (match_operand:V4SI 2 "register_operand" "")]
5696  "TARGET_SSE4_1"
5697{
5698  rtx op1, op2, t1, t2;
5699
5700  op1 = operands[1];
5701  op2 = operands[2];
5702  t1 = gen_reg_rtx (V4SImode);
5703  t2 = gen_reg_rtx (V4SImode);
5704
5705  if (TARGET_XOP)
5706    {
5707      rtx t3 = gen_reg_rtx (V2DImode);
5708
5709      emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5710				    GEN_INT (1), GEN_INT (3)));
5711      emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5712				    GEN_INT (1), GEN_INT (3)));
5713      emit_move_insn (t3, CONST0_RTX (V2DImode));
5714
5715      emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3));
5716      DONE;
5717    }
5718
5719  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5720  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5721  emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5722  DONE;
5723})
5724
5725(define_expand "vec_widen_umult_hi_v4si"
5726  [(match_operand:V2DI 0 "register_operand" "")
5727   (match_operand:V4SI 1 "register_operand" "")
5728   (match_operand:V4SI 2 "register_operand" "")]
5729  "TARGET_SSE2"
5730{
5731  rtx op1, op2, t1, t2;
5732
5733  op1 = operands[1];
5734  op2 = operands[2];
5735  t1 = gen_reg_rtx (V4SImode);
5736  t2 = gen_reg_rtx (V4SImode);
5737
5738  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5739  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5740  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5741  DONE;
5742})
5743
5744(define_expand "vec_widen_umult_lo_v4si"
5745  [(match_operand:V2DI 0 "register_operand" "")
5746   (match_operand:V4SI 1 "register_operand" "")
5747   (match_operand:V4SI 2 "register_operand" "")]
5748  "TARGET_SSE2"
5749{
5750  rtx op1, op2, t1, t2;
5751
5752  op1 = operands[1];
5753  op2 = operands[2];
5754  t1 = gen_reg_rtx (V4SImode);
5755  t2 = gen_reg_rtx (V4SImode);
5756
5757  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5758  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5759  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5760  DONE;
5761})
5762
5763(define_expand "sdot_prod<mode>"
5764  [(match_operand:<sseunpackmode> 0 "register_operand" "")
5765   (match_operand:VI2_AVX2 1 "register_operand" "")
5766   (match_operand:VI2_AVX2 2 "register_operand" "")
5767   (match_operand:<sseunpackmode> 3 "register_operand" "")]
5768  "TARGET_SSE2"
5769{
5770  rtx t = gen_reg_rtx (<sseunpackmode>mode);
5771  emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5772  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5773			  gen_rtx_PLUS (<sseunpackmode>mode,
5774					operands[3], t)));
5775  DONE;
5776})
5777
5778(define_code_attr sse2_sse4_1
5779   [(zero_extend "sse2") (sign_extend "sse4_1")])
5780
5781(define_expand "<s>dot_prodv4si"
5782  [(match_operand:V2DI 0 "register_operand" "")
5783   (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5784   (match_operand:V4SI 2 "register_operand" "")
5785   (match_operand:V2DI 3 "register_operand" "")]
5786  "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5787{
5788  rtx t1, t2, t3, t4;
5789
5790  t1 = gen_reg_rtx (V2DImode);
5791  emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5792  emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5793
5794  t2 = gen_reg_rtx (V4SImode);
5795  t3 = gen_reg_rtx (V4SImode);
5796  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5797				 gen_lowpart (V1TImode, operands[1]),
5798				 GEN_INT (32)));
5799  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5800				 gen_lowpart (V1TImode, operands[2]),
5801				 GEN_INT (32)));
5802
5803  t4 = gen_reg_rtx (V2DImode);
5804  emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5805
5806  emit_insn (gen_addv2di3 (operands[0], t1, t4));
5807  DONE;
5808})
5809
5810(define_expand "<s>dot_prodv8si"
5811  [(match_operand:V4DI 0 "register_operand" "")
5812   (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5813   (match_operand:V8SI 2 "register_operand" "")
5814   (match_operand:V4DI 3 "register_operand" "")]
5815  "TARGET_AVX2"
5816{
5817  rtx t1, t2, t3, t4;
5818
5819  t1 = gen_reg_rtx (V4DImode);
5820  emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5821  emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5822
5823  t2 = gen_reg_rtx (V8SImode);
5824  t3 = gen_reg_rtx (V8SImode);
5825  emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5826				 gen_lowpart (V2TImode, operands[1]),
5827				 GEN_INT (32)));
5828  emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5829				 gen_lowpart (V2TImode, operands[2]),
5830				 GEN_INT (32)));
5831
5832  t4 = gen_reg_rtx (V4DImode);
5833  emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5834
5835  emit_insn (gen_addv4di3 (operands[0], t1, t4));
5836  DONE;
5837})
5838
5839(define_insn "ashr<mode>3"
5840  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5841	(ashiftrt:VI24_AVX2
5842	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5843	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5844  "TARGET_SSE2"
5845  "@
5846   psra<ssemodesuffix>\t{%2, %0|%0, %2}
5847   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5848  [(set_attr "isa" "noavx,avx")
5849   (set_attr "type" "sseishft")
5850   (set (attr "length_immediate")
5851     (if_then_else (match_operand 2 "const_int_operand" "")
5852       (const_string "1")
5853       (const_string "0")))
5854   (set_attr "prefix_data16" "1,*")
5855   (set_attr "prefix" "orig,vex")
5856   (set_attr "mode" "<sseinsnmode>")])
5857
5858(define_insn "<shift_insn><mode>3"
5859  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5860	(any_lshift:VI248_AVX2
5861	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5862	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5863  "TARGET_SSE2"
5864  "@
5865   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5866   vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5867  [(set_attr "isa" "noavx,avx")
5868   (set_attr "type" "sseishft")
5869   (set (attr "length_immediate")
5870     (if_then_else (match_operand 2 "const_int_operand" "")
5871       (const_string "1")
5872       (const_string "0")))
5873   (set_attr "prefix_data16" "1,*")
5874   (set_attr "prefix" "orig,vex")
5875   (set_attr "mode" "<sseinsnmode>")])
5876
5877(define_expand "vec_shl_<mode>"
5878  [(set (match_operand:VI_128 0 "register_operand" "")
5879	(ashift:V1TI
5880	 (match_operand:VI_128 1 "register_operand" "")
5881	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5882  "TARGET_SSE2"
5883{
5884  operands[0] = gen_lowpart (V1TImode, operands[0]);
5885  operands[1] = gen_lowpart (V1TImode, operands[1]);
5886})
5887
5888(define_insn "<sse2_avx2>_ashl<mode>3"
5889  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5890	(ashift:VIMAX_AVX2
5891	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5892	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5893  "TARGET_SSE2"
5894{
5895  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5896
5897  switch (which_alternative)
5898    {
5899    case 0:
5900      return "pslldq\t{%2, %0|%0, %2}";
5901    case 1:
5902      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5903    default:
5904      gcc_unreachable ();
5905    }
5906}
5907  [(set_attr "isa" "noavx,avx")
5908   (set_attr "type" "sseishft")
5909   (set_attr "length_immediate" "1")
5910   (set_attr "prefix_data16" "1,*")
5911   (set_attr "prefix" "orig,vex")
5912   (set_attr "mode" "<sseinsnmode>")])
5913
5914(define_expand "vec_shr_<mode>"
5915  [(set (match_operand:VI_128 0 "register_operand" "")
5916	(lshiftrt:V1TI
5917	 (match_operand:VI_128 1 "register_operand" "")
5918	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5919  "TARGET_SSE2"
5920{
5921  operands[0] = gen_lowpart (V1TImode, operands[0]);
5922  operands[1] = gen_lowpart (V1TImode, operands[1]);
5923})
5924
5925(define_insn "<sse2_avx2>_lshr<mode>3"
5926  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5927	(lshiftrt:VIMAX_AVX2
5928	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5929	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5930  "TARGET_SSE2"
5931{
5932  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5933
5934  switch (which_alternative)
5935    {
5936    case 0:
5937      return "psrldq\t{%2, %0|%0, %2}";
5938    case 1:
5939      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5940    default:
5941      gcc_unreachable ();
5942    }
5943}
5944  [(set_attr "isa" "noavx,avx")
5945   (set_attr "type" "sseishft")
5946   (set_attr "length_immediate" "1")
5947   (set_attr "atom_unit" "sishuf")
5948   (set_attr "prefix_data16" "1,*")
5949   (set_attr "prefix" "orig,vex")
5950   (set_attr "mode" "<sseinsnmode>")])
5951
5952
5953(define_expand "<code><mode>3"
5954  [(set (match_operand:VI124_256 0 "register_operand" "")
5955	(maxmin:VI124_256
5956	  (match_operand:VI124_256 1 "nonimmediate_operand" "")
5957	  (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5958  "TARGET_AVX2"
5959  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5960
5961(define_insn "*avx2_<code><mode>3"
5962  [(set (match_operand:VI124_256 0 "register_operand" "=x")
5963	(maxmin:VI124_256
5964	  (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5965	  (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5966  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5967  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5968  [(set_attr "type" "sseiadd")
5969   (set_attr "prefix_extra" "1")
5970   (set_attr "prefix" "vex")
5971   (set_attr "mode" "OI")])
5972
5973(define_expand "<code><mode>3"
5974  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5975	(maxmin:VI8_AVX2
5976	  (match_operand:VI8_AVX2 1 "register_operand" "")
5977	  (match_operand:VI8_AVX2 2 "register_operand" "")))]
5978  "TARGET_SSE4_2"
5979{
5980  enum rtx_code code;
5981  rtx xops[6];
5982  bool ok;
5983
5984  xops[0] = operands[0];
5985
5986  if (<CODE> == SMAX || <CODE> == UMAX)
5987    {
5988      xops[1] = operands[1];
5989      xops[2] = operands[2];
5990    }
5991  else
5992    {
5993      xops[1] = operands[2];
5994      xops[2] = operands[1];
5995    }
5996
5997  code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5998
5999  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6000  xops[4] = operands[1];
6001  xops[5] = operands[2];
6002
6003  ok = ix86_expand_int_vcond (xops);
6004  gcc_assert (ok);
6005  DONE;
6006})
6007
6008(define_expand "<code><mode>3"
6009  [(set (match_operand:VI124_128 0 "register_operand" "")
6010	(smaxmin:VI124_128
6011	  (match_operand:VI124_128 1 "nonimmediate_operand" "")
6012	  (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6013  "TARGET_SSE2"
6014{
6015  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
6016    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6017  else
6018    {
6019      rtx xops[6];
6020      bool ok;
6021
6022      xops[0] = operands[0];
6023      operands[1] = force_reg (<MODE>mode, operands[1]);
6024      operands[2] = force_reg (<MODE>mode, operands[2]);
6025
6026      if (<CODE> == SMAX)
6027	{
6028	  xops[1] = operands[1];
6029	  xops[2] = operands[2];
6030	}
6031      else
6032	{
6033	  xops[1] = operands[2];
6034	  xops[2] = operands[1];
6035	}
6036
6037      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6038      xops[4] = operands[1];
6039      xops[5] = operands[2];
6040
6041      ok = ix86_expand_int_vcond (xops);
6042      gcc_assert (ok);
6043      DONE;
6044    }
6045})
6046
6047(define_insn "*sse4_1_<code><mode>3"
6048  [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6049	(smaxmin:VI14_128
6050	  (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6051	  (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6052  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6053  "@
6054   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6055   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6056  [(set_attr "isa" "noavx,avx")
6057   (set_attr "type" "sseiadd")
6058   (set_attr "prefix_extra" "1,*")
6059   (set_attr "prefix" "orig,vex")
6060   (set_attr "mode" "TI")])
6061
6062(define_insn "*<code>v8hi3"
6063  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6064	(smaxmin:V8HI
6065	  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6066	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6067  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6068  "@
6069   p<maxmin_int>w\t{%2, %0|%0, %2}
6070   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6071  [(set_attr "isa" "noavx,avx")
6072   (set_attr "type" "sseiadd")
6073   (set_attr "prefix_data16" "1,*")
6074   (set_attr "prefix_extra" "*,1")
6075   (set_attr "prefix" "orig,vex")
6076   (set_attr "mode" "TI")])
6077
6078(define_expand "<code><mode>3"
6079  [(set (match_operand:VI124_128 0 "register_operand" "")
6080	(umaxmin:VI124_128
6081	  (match_operand:VI124_128 1 "nonimmediate_operand" "")
6082	  (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6083  "TARGET_SSE2"
6084{
6085  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6086    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6087  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6088    {
6089      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6090      operands[1] = force_reg (<MODE>mode, operands[1]);
6091      if (rtx_equal_p (op3, op2))
6092	op3 = gen_reg_rtx (V8HImode);
6093      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6094      emit_insn (gen_addv8hi3 (op0, op3, op2));
6095      DONE;
6096    }
6097  else
6098    {
6099      rtx xops[6];
6100      bool ok;
6101
6102      operands[1] = force_reg (<MODE>mode, operands[1]);
6103      operands[2] = force_reg (<MODE>mode, operands[2]);
6104
6105      xops[0] = operands[0];
6106
6107      if (<CODE> == UMAX)
6108	{
6109	  xops[1] = operands[1];
6110	  xops[2] = operands[2];
6111	}
6112      else
6113	{
6114	  xops[1] = operands[2];
6115	  xops[2] = operands[1];
6116	}
6117
6118      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6119      xops[4] = operands[1];
6120      xops[5] = operands[2];
6121
6122      ok = ix86_expand_int_vcond (xops);
6123      gcc_assert (ok);
6124      DONE;
6125    }
6126})
6127
6128(define_insn "*sse4_1_<code><mode>3"
6129  [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6130	(umaxmin:VI24_128
6131	  (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6132	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6133  "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6134  "@
6135   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6136   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6137  [(set_attr "isa" "noavx,avx")
6138   (set_attr "type" "sseiadd")
6139   (set_attr "prefix_extra" "1,*")
6140   (set_attr "prefix" "orig,vex")
6141   (set_attr "mode" "TI")])
6142
6143(define_insn "*<code>v16qi3"
6144  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6145	(umaxmin:V16QI
6146	  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6147	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6148  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6149  "@
6150   p<maxmin_int>b\t{%2, %0|%0, %2}
6151   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6152  [(set_attr "isa" "noavx,avx")
6153   (set_attr "type" "sseiadd")
6154   (set_attr "prefix_data16" "1,*")
6155   (set_attr "prefix_extra" "*,1")
6156   (set_attr "prefix" "orig,vex")
6157   (set_attr "mode" "TI")])
6158
6159;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6160;;
6161;; Parallel integral comparisons
6162;;
6163;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6164
6165(define_expand "avx2_eq<mode>3"
6166  [(set (match_operand:VI_256 0 "register_operand" "")
6167	(eq:VI_256
6168	  (match_operand:VI_256 1 "nonimmediate_operand" "")
6169	  (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6170  "TARGET_AVX2"
6171  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6172
6173(define_insn "*avx2_eq<mode>3"
6174  [(set (match_operand:VI_256 0 "register_operand" "=x")
6175	(eq:VI_256
6176	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6177	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6178  "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6179  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6180  [(set_attr "type" "ssecmp")
6181   (set_attr "prefix_extra" "1")
6182   (set_attr "prefix" "vex")
6183   (set_attr "mode" "OI")])
6184
6185(define_insn "*sse4_1_eqv2di3"
6186  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6187	(eq:V2DI
6188	  (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6189	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6190  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6191  "@
6192   pcmpeqq\t{%2, %0|%0, %2}
6193   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6194  [(set_attr "isa" "noavx,avx")
6195   (set_attr "type" "ssecmp")
6196   (set_attr "prefix_extra" "1")
6197   (set_attr "prefix" "orig,vex")
6198   (set_attr "mode" "TI")])
6199
6200(define_insn "*sse2_eq<mode>3"
6201  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6202	(eq:VI124_128
6203	  (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6204	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6205  "TARGET_SSE2 && !TARGET_XOP
6206   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6207  "@
6208   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6209   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6210  [(set_attr "isa" "noavx,avx")
6211   (set_attr "type" "ssecmp")
6212   (set_attr "prefix_data16" "1,*")
6213   (set_attr "prefix" "orig,vex")
6214   (set_attr "mode" "TI")])
6215
6216(define_expand "sse2_eq<mode>3"
6217  [(set (match_operand:VI124_128 0 "register_operand" "")
6218	(eq:VI124_128
6219	  (match_operand:VI124_128 1 "nonimmediate_operand" "")
6220	  (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6221  "TARGET_SSE2 && !TARGET_XOP "
6222  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6223
6224(define_expand "sse4_1_eqv2di3"
6225  [(set (match_operand:V2DI 0 "register_operand" "")
6226	(eq:V2DI
6227	  (match_operand:V2DI 1 "nonimmediate_operand" "")
6228	  (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6229  "TARGET_SSE4_1"
6230  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6231
6232(define_insn "sse4_2_gtv2di3"
6233  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6234	(gt:V2DI
6235	  (match_operand:V2DI 1 "register_operand" "0,x")
6236	  (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6237  "TARGET_SSE4_2"
6238  "@
6239   pcmpgtq\t{%2, %0|%0, %2}
6240   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6241  [(set_attr "isa" "noavx,avx")
6242   (set_attr "type" "ssecmp")
6243   (set_attr "prefix_extra" "1")
6244   (set_attr "prefix" "orig,vex")
6245   (set_attr "mode" "TI")])
6246
6247(define_insn "avx2_gt<mode>3"
6248  [(set (match_operand:VI_256 0 "register_operand" "=x")
6249	(gt:VI_256
6250	  (match_operand:VI_256 1 "register_operand" "x")
6251	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6252  "TARGET_AVX2"
6253  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6254  [(set_attr "type" "ssecmp")
6255   (set_attr "prefix_extra" "1")
6256   (set_attr "prefix" "vex")
6257   (set_attr "mode" "OI")])
6258
6259(define_insn "sse2_gt<mode>3"
6260  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6261	(gt:VI124_128
6262	  (match_operand:VI124_128 1 "register_operand" "0,x")
6263	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6264  "TARGET_SSE2 && !TARGET_XOP"
6265  "@
6266   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6267   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6268  [(set_attr "isa" "noavx,avx")
6269   (set_attr "type" "ssecmp")
6270   (set_attr "prefix_data16" "1,*")
6271   (set_attr "prefix" "orig,vex")
6272   (set_attr "mode" "TI")])
6273
6274(define_expand "vcond<V_256:mode><VI_256:mode>"
6275  [(set (match_operand:V_256 0 "register_operand" "")
6276	(if_then_else:V_256
6277	  (match_operator 3 ""
6278	    [(match_operand:VI_256 4 "nonimmediate_operand" "")
6279	     (match_operand:VI_256 5 "general_operand" "")])
6280	  (match_operand:V_256 1 "" "")
6281	  (match_operand:V_256 2 "" "")))]
6282  "TARGET_AVX2
6283   && (GET_MODE_NUNITS (<V_256:MODE>mode)
6284       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6285{
6286  bool ok = ix86_expand_int_vcond (operands);
6287  gcc_assert (ok);
6288  DONE;
6289})
6290
6291(define_expand "vcond<V_128:mode><VI124_128:mode>"
6292  [(set (match_operand:V_128 0 "register_operand" "")
6293	(if_then_else:V_128
6294	  (match_operator 3 ""
6295	    [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6296	     (match_operand:VI124_128 5 "general_operand" "")])
6297	  (match_operand:V_128 1 "" "")
6298	  (match_operand:V_128 2 "" "")))]
6299  "TARGET_SSE2
6300   && (GET_MODE_NUNITS (<V_128:MODE>mode)
6301       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6302{
6303  bool ok = ix86_expand_int_vcond (operands);
6304  gcc_assert (ok);
6305  DONE;
6306})
6307
6308(define_expand "vcond<VI8F_128:mode>v2di"
6309  [(set (match_operand:VI8F_128 0 "register_operand" "")
6310	(if_then_else:VI8F_128
6311	  (match_operator 3 ""
6312	    [(match_operand:V2DI 4 "nonimmediate_operand" "")
6313	     (match_operand:V2DI 5 "general_operand" "")])
6314	  (match_operand:VI8F_128 1 "" "")
6315	  (match_operand:VI8F_128 2 "" "")))]
6316  "TARGET_SSE4_2"
6317{
6318  bool ok = ix86_expand_int_vcond (operands);
6319  gcc_assert (ok);
6320  DONE;
6321})
6322
6323(define_expand "vcondu<V_256:mode><VI_256:mode>"
6324  [(set (match_operand:V_256 0 "register_operand" "")
6325	(if_then_else:V_256
6326	  (match_operator 3 ""
6327	    [(match_operand:VI_256 4 "nonimmediate_operand" "")
6328	     (match_operand:VI_256 5 "nonimmediate_operand" "")])
6329	  (match_operand:V_256 1 "general_operand" "")
6330	  (match_operand:V_256 2 "general_operand" "")))]
6331  "TARGET_AVX2
6332   && (GET_MODE_NUNITS (<V_256:MODE>mode)
6333       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6334{
6335  bool ok = ix86_expand_int_vcond (operands);
6336  gcc_assert (ok);
6337  DONE;
6338})
6339
6340(define_expand "vcondu<V_128:mode><VI124_128:mode>"
6341  [(set (match_operand:V_128 0 "register_operand" "")
6342	(if_then_else:V_128
6343	  (match_operator 3 ""
6344	    [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6345	     (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6346	  (match_operand:V_128 1 "general_operand" "")
6347	  (match_operand:V_128 2 "general_operand" "")))]
6348  "TARGET_SSE2
6349   && (GET_MODE_NUNITS (<V_128:MODE>mode)
6350       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6351{
6352  bool ok = ix86_expand_int_vcond (operands);
6353  gcc_assert (ok);
6354  DONE;
6355})
6356
6357(define_expand "vcondu<VI8F_128:mode>v2di"
6358  [(set (match_operand:VI8F_128 0 "register_operand" "")
6359	(if_then_else:VI8F_128
6360	  (match_operator 3 ""
6361	    [(match_operand:V2DI 4 "nonimmediate_operand" "")
6362	     (match_operand:V2DI 5 "nonimmediate_operand" "")])
6363	  (match_operand:VI8F_128 1 "general_operand" "")
6364	  (match_operand:VI8F_128 2 "general_operand" "")))]
6365  "TARGET_SSE4_2"
6366{
6367  bool ok = ix86_expand_int_vcond (operands);
6368  gcc_assert (ok);
6369  DONE;
6370})
6371
6372(define_mode_iterator VEC_PERM_AVX2
6373  [V16QI V8HI V4SI V2DI V4SF V2DF
6374   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6375   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6376   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6377
6378(define_expand "vec_perm<mode>"
6379  [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6380   (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6381   (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6382   (match_operand:<sseintvecmode> 3 "register_operand" "")]
6383  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6384{
6385  ix86_expand_vec_perm (operands);
6386  DONE;
6387})
6388
6389(define_mode_iterator VEC_PERM_CONST
6390  [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6391   (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6392   (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6393   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6394   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6395   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6396
6397(define_expand "vec_perm_const<mode>"
6398  [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6399   (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6400   (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6401   (match_operand:<sseintvecmode> 3 "" "")]
6402  ""
6403{
6404  if (ix86_expand_vec_perm_const (operands))
6405    DONE;
6406  else
6407    FAIL;
6408})
6409
6410;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6411;;
6412;; Parallel bitwise logical operations
6413;;
6414;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6415
6416(define_expand "one_cmpl<mode>2"
6417  [(set (match_operand:VI 0 "register_operand" "")
6418	(xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6419		(match_dup 2)))]
6420  "TARGET_SSE"
6421{
6422  int i, n = GET_MODE_NUNITS (<MODE>mode);
6423  rtvec v = rtvec_alloc (n);
6424
6425  for (i = 0; i < n; ++i)
6426    RTVEC_ELT (v, i) = constm1_rtx;
6427
6428  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6429})
6430
6431(define_expand "<sse2_avx2>_andnot<mode>3"
6432  [(set (match_operand:VI_AVX2 0 "register_operand" "")
6433	(and:VI_AVX2
6434	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6435	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6436  "TARGET_SSE2")
6437
6438(define_insn "*andnot<mode>3"
6439  [(set (match_operand:VI 0 "register_operand" "=x,x")
6440	(and:VI
6441	  (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6442	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6443  "TARGET_SSE"
6444{
6445  static char buf[32];
6446  const char *ops;
6447  const char *tmp;
6448
6449  switch (get_attr_mode (insn))
6450    {
6451    case MODE_OI:
6452      gcc_assert (TARGET_AVX2);
6453    case MODE_TI:
6454      gcc_assert (TARGET_SSE2);
6455
6456      tmp = "pandn";
6457      break;
6458
6459   case MODE_V8SF:
6460      gcc_assert (TARGET_AVX);
6461   case MODE_V4SF:
6462      gcc_assert (TARGET_SSE);
6463
6464      tmp = "andnps";
6465      break;
6466
6467   default:
6468      gcc_unreachable ();
6469   }
6470
6471  switch (which_alternative)
6472    {
6473    case 0:
6474      ops = "%s\t{%%2, %%0|%%0, %%2}";
6475      break;
6476    case 1:
6477      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6478      break;
6479    default:
6480      gcc_unreachable ();
6481    }
6482
6483  snprintf (buf, sizeof (buf), ops, tmp);
6484  return buf;
6485}
6486  [(set_attr "isa" "noavx,avx")
6487   (set_attr "type" "sselog")
6488   (set (attr "prefix_data16")
6489     (if_then_else
6490       (and (eq_attr "alternative" "0")
6491	    (eq_attr "mode" "TI"))
6492       (const_string "1")
6493       (const_string "*")))
6494   (set_attr "prefix" "orig,vex")
6495   (set (attr "mode")
6496     (cond [(and (not (match_test "TARGET_AVX2"))
6497		 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6498	      (const_string "V8SF")
6499	    (not (match_test "TARGET_SSE2"))
6500	      (const_string "V4SF")
6501	   ]
6502	   (const_string "<sseinsnmode>")))])
6503
6504(define_expand "<code><mode>3"
6505  [(set (match_operand:VI 0 "register_operand" "")
6506	(any_logic:VI
6507	  (match_operand:VI 1 "nonimmediate_operand" "")
6508	  (match_operand:VI 2 "nonimmediate_operand" "")))]
6509  "TARGET_SSE"
6510  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6511
6512(define_insn "*<code><mode>3"
6513  [(set (match_operand:VI 0 "register_operand" "=x,x")
6514	(any_logic:VI
6515	  (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6516	  (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6517  "TARGET_SSE
6518   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6519{
6520  static char buf[32];
6521  const char *ops;
6522  const char *tmp;
6523
6524  switch (get_attr_mode (insn))
6525    {
6526    case MODE_OI:
6527      gcc_assert (TARGET_AVX2);
6528    case MODE_TI:
6529      gcc_assert (TARGET_SSE2);
6530
6531      tmp = "p<logic>";
6532      break;
6533
6534   case MODE_V8SF:
6535      gcc_assert (TARGET_AVX);
6536   case MODE_V4SF:
6537      gcc_assert (TARGET_SSE);
6538
6539      tmp = "<logic>ps";
6540      break;
6541
6542   default:
6543      gcc_unreachable ();
6544   }
6545
6546  switch (which_alternative)
6547    {
6548    case 0:
6549      ops = "%s\t{%%2, %%0|%%0, %%2}";
6550      break;
6551    case 1:
6552      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6553      break;
6554    default:
6555      gcc_unreachable ();
6556    }
6557
6558  snprintf (buf, sizeof (buf), ops, tmp);
6559  return buf;
6560}
6561  [(set_attr "isa" "noavx,avx")
6562   (set_attr "type" "sselog")
6563   (set (attr "prefix_data16")
6564     (if_then_else
6565       (and (eq_attr "alternative" "0")
6566	    (eq_attr "mode" "TI"))
6567       (const_string "1")
6568       (const_string "*")))
6569   (set_attr "prefix" "orig,vex")
6570   (set (attr "mode")
6571     (cond [(and (not (match_test "TARGET_AVX2"))
6572		 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6573	      (const_string "V8SF")
6574	    (not (match_test "TARGET_SSE2"))
6575	      (const_string "V4SF")
6576	   ]
6577	   (const_string "<sseinsnmode>")))])
6578
6579(define_insn "*andnottf3"
6580  [(set (match_operand:TF 0 "register_operand" "=x,x")
6581	(and:TF
6582	  (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6583	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6584  "TARGET_SSE2"
6585  "@
6586   pandn\t{%2, %0|%0, %2}
6587   vpandn\t{%2, %1, %0|%0, %1, %2}"
6588  [(set_attr "isa" "noavx,avx")
6589   (set_attr "type" "sselog")
6590   (set_attr "prefix_data16" "1,*")
6591   (set_attr "prefix" "orig,vex")
6592   (set_attr "mode" "TI")])
6593
6594(define_expand "<code>tf3"
6595  [(set (match_operand:TF 0 "register_operand" "")
6596	(any_logic:TF
6597	  (match_operand:TF 1 "nonimmediate_operand" "")
6598	  (match_operand:TF 2 "nonimmediate_operand" "")))]
6599  "TARGET_SSE2"
6600  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6601
6602(define_insn "*<code>tf3"
6603  [(set (match_operand:TF 0 "register_operand" "=x,x")
6604	(any_logic:TF
6605	  (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6606	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6607  "TARGET_SSE2
6608   && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6609  "@
6610   p<logic>\t{%2, %0|%0, %2}
6611   vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6612  [(set_attr "isa" "noavx,avx")
6613   (set_attr "type" "sselog")
6614   (set_attr "prefix_data16" "1,*")
6615   (set_attr "prefix" "orig,vex")
6616   (set_attr "mode" "TI")])
6617
6618;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6619;;
6620;; Parallel integral element swizzling
6621;;
6622;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6623
6624(define_expand "vec_pack_trunc_<mode>"
6625  [(match_operand:<ssepackmode> 0 "register_operand" "")
6626   (match_operand:VI248_AVX2 1 "register_operand" "")
6627   (match_operand:VI248_AVX2 2 "register_operand" "")]
6628  "TARGET_SSE2"
6629{
6630  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6631  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6632  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6633  DONE;
6634})
6635
6636(define_insn "<sse2_avx2>_packsswb"
6637  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6638	(vec_concat:VI1_AVX2
6639	  (ss_truncate:<ssehalfvecmode>
6640	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6641	  (ss_truncate:<ssehalfvecmode>
6642	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6643  "TARGET_SSE2"
6644  "@
6645   packsswb\t{%2, %0|%0, %2}
6646   vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6647  [(set_attr "isa" "noavx,avx")
6648   (set_attr "type" "sselog")
6649   (set_attr "prefix_data16" "1,*")
6650   (set_attr "prefix" "orig,vex")
6651   (set_attr "mode" "<sseinsnmode>")])
6652
6653(define_insn "<sse2_avx2>_packssdw"
6654  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6655	(vec_concat:VI2_AVX2
6656	  (ss_truncate:<ssehalfvecmode>
6657	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6658	  (ss_truncate:<ssehalfvecmode>
6659	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6660  "TARGET_SSE2"
6661  "@
6662   packssdw\t{%2, %0|%0, %2}
6663   vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6664  [(set_attr "isa" "noavx,avx")
6665   (set_attr "type" "sselog")
6666   (set_attr "prefix_data16" "1,*")
6667   (set_attr "prefix" "orig,vex")
6668   (set_attr "mode" "<sseinsnmode>")])
6669
6670(define_insn "<sse2_avx2>_packuswb"
6671  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6672	(vec_concat:VI1_AVX2
6673	  (us_truncate:<ssehalfvecmode>
6674	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6675	  (us_truncate:<ssehalfvecmode>
6676	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6677  "TARGET_SSE2"
6678  "@
6679   packuswb\t{%2, %0|%0, %2}
6680   vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6681  [(set_attr "isa" "noavx,avx")
6682   (set_attr "type" "sselog")
6683   (set_attr "prefix_data16" "1,*")
6684   (set_attr "prefix" "orig,vex")
6685   (set_attr "mode" "<sseinsnmode>")])
6686
6687(define_insn "avx2_interleave_highv32qi"
6688  [(set (match_operand:V32QI 0 "register_operand" "=x")
6689	(vec_select:V32QI
6690	  (vec_concat:V64QI
6691	    (match_operand:V32QI 1 "register_operand" "x")
6692	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6693	  (parallel [(const_int 8)  (const_int 40)
6694		     (const_int 9)  (const_int 41)
6695		     (const_int 10) (const_int 42)
6696		     (const_int 11) (const_int 43)
6697		     (const_int 12) (const_int 44)
6698		     (const_int 13) (const_int 45)
6699		     (const_int 14) (const_int 46)
6700		     (const_int 15) (const_int 47)
6701		     (const_int 24) (const_int 56)
6702		     (const_int 25) (const_int 57)
6703		     (const_int 26) (const_int 58)
6704		     (const_int 27) (const_int 59)
6705		     (const_int 28) (const_int 60)
6706		     (const_int 29) (const_int 61)
6707		     (const_int 30) (const_int 62)
6708		     (const_int 31) (const_int 63)])))]
6709  "TARGET_AVX2"
6710  "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6711  [(set_attr "type" "sselog")
6712   (set_attr "prefix" "vex")
6713   (set_attr "mode" "OI")])
6714
6715(define_insn "vec_interleave_highv16qi"
6716  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6717	(vec_select:V16QI
6718	  (vec_concat:V32QI
6719	    (match_operand:V16QI 1 "register_operand" "0,x")
6720	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6721	  (parallel [(const_int 8)  (const_int 24)
6722		     (const_int 9)  (const_int 25)
6723		     (const_int 10) (const_int 26)
6724		     (const_int 11) (const_int 27)
6725		     (const_int 12) (const_int 28)
6726		     (const_int 13) (const_int 29)
6727		     (const_int 14) (const_int 30)
6728		     (const_int 15) (const_int 31)])))]
6729  "TARGET_SSE2"
6730  "@
6731   punpckhbw\t{%2, %0|%0, %2}
6732   vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6733  [(set_attr "isa" "noavx,avx")
6734   (set_attr "type" "sselog")
6735   (set_attr "prefix_data16" "1,*")
6736   (set_attr "prefix" "orig,vex")
6737   (set_attr "mode" "TI")])
6738
6739(define_insn "avx2_interleave_lowv32qi"
6740  [(set (match_operand:V32QI 0 "register_operand" "=x")
6741	(vec_select:V32QI
6742	  (vec_concat:V64QI
6743	    (match_operand:V32QI 1 "register_operand" "x")
6744	    (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6745	  (parallel [(const_int 0) (const_int 32)
6746		     (const_int 1) (const_int 33)
6747		     (const_int 2) (const_int 34)
6748		     (const_int 3) (const_int 35)
6749		     (const_int 4) (const_int 36)
6750		     (const_int 5) (const_int 37)
6751		     (const_int 6) (const_int 38)
6752		     (const_int 7) (const_int 39)
6753		     (const_int 16) (const_int 48)
6754		     (const_int 17) (const_int 49)
6755		     (const_int 18) (const_int 50)
6756		     (const_int 19) (const_int 51)
6757		     (const_int 20) (const_int 52)
6758		     (const_int 21) (const_int 53)
6759		     (const_int 22) (const_int 54)
6760		     (const_int 23) (const_int 55)])))]
6761  "TARGET_AVX2"
6762  "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6763  [(set_attr "type" "sselog")
6764   (set_attr "prefix" "vex")
6765   (set_attr "mode" "OI")])
6766
6767(define_insn "vec_interleave_lowv16qi"
6768  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6769	(vec_select:V16QI
6770	  (vec_concat:V32QI
6771	    (match_operand:V16QI 1 "register_operand" "0,x")
6772	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6773	  (parallel [(const_int 0) (const_int 16)
6774		     (const_int 1) (const_int 17)
6775		     (const_int 2) (const_int 18)
6776		     (const_int 3) (const_int 19)
6777		     (const_int 4) (const_int 20)
6778		     (const_int 5) (const_int 21)
6779		     (const_int 6) (const_int 22)
6780		     (const_int 7) (const_int 23)])))]
6781  "TARGET_SSE2"
6782  "@
6783   punpcklbw\t{%2, %0|%0, %2}
6784   vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6785  [(set_attr "isa" "noavx,avx")
6786   (set_attr "type" "sselog")
6787   (set_attr "prefix_data16" "1,*")
6788   (set_attr "prefix" "orig,vex")
6789   (set_attr "mode" "TI")])
6790
6791(define_insn "avx2_interleave_highv16hi"
6792  [(set (match_operand:V16HI 0 "register_operand" "=x")
6793	(vec_select:V16HI
6794	  (vec_concat:V32HI
6795	    (match_operand:V16HI 1 "register_operand" "x")
6796	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6797	  (parallel [(const_int 4) (const_int 20)
6798		     (const_int 5) (const_int 21)
6799		     (const_int 6) (const_int 22)
6800		     (const_int 7) (const_int 23)
6801		     (const_int 12) (const_int 28)
6802		     (const_int 13) (const_int 29)
6803		     (const_int 14) (const_int 30)
6804		     (const_int 15) (const_int 31)])))]
6805  "TARGET_AVX2"
6806  "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6807  [(set_attr "type" "sselog")
6808   (set_attr "prefix" "vex")
6809   (set_attr "mode" "OI")])
6810
6811(define_insn "vec_interleave_highv8hi"
6812  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6813	(vec_select:V8HI
6814	  (vec_concat:V16HI
6815	    (match_operand:V8HI 1 "register_operand" "0,x")
6816	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6817	  (parallel [(const_int 4) (const_int 12)
6818		     (const_int 5) (const_int 13)
6819		     (const_int 6) (const_int 14)
6820		     (const_int 7) (const_int 15)])))]
6821  "TARGET_SSE2"
6822  "@
6823   punpckhwd\t{%2, %0|%0, %2}
6824   vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6825  [(set_attr "isa" "noavx,avx")
6826   (set_attr "type" "sselog")
6827   (set_attr "prefix_data16" "1,*")
6828   (set_attr "prefix" "orig,vex")
6829   (set_attr "mode" "TI")])
6830
6831(define_insn "avx2_interleave_lowv16hi"
6832  [(set (match_operand:V16HI 0 "register_operand" "=x")
6833	(vec_select:V16HI
6834	  (vec_concat:V32HI
6835	    (match_operand:V16HI 1 "register_operand" "x")
6836	    (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6837	  (parallel [(const_int 0) (const_int 16)
6838		     (const_int 1) (const_int 17)
6839		     (const_int 2) (const_int 18)
6840		     (const_int 3) (const_int 19)
6841		     (const_int 8) (const_int 24)
6842		     (const_int 9) (const_int 25)
6843		     (const_int 10) (const_int 26)
6844		     (const_int 11) (const_int 27)])))]
6845  "TARGET_AVX2"
6846  "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6847  [(set_attr "type" "sselog")
6848   (set_attr "prefix" "vex")
6849   (set_attr "mode" "OI")])
6850
6851(define_insn "vec_interleave_lowv8hi"
6852  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6853	(vec_select:V8HI
6854	  (vec_concat:V16HI
6855	    (match_operand:V8HI 1 "register_operand" "0,x")
6856	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6857	  (parallel [(const_int 0) (const_int 8)
6858		     (const_int 1) (const_int 9)
6859		     (const_int 2) (const_int 10)
6860		     (const_int 3) (const_int 11)])))]
6861  "TARGET_SSE2"
6862  "@
6863   punpcklwd\t{%2, %0|%0, %2}
6864   vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6865  [(set_attr "isa" "noavx,avx")
6866   (set_attr "type" "sselog")
6867   (set_attr "prefix_data16" "1,*")
6868   (set_attr "prefix" "orig,vex")
6869   (set_attr "mode" "TI")])
6870
6871(define_insn "avx2_interleave_highv8si"
6872  [(set (match_operand:V8SI 0 "register_operand" "=x")
6873	(vec_select:V8SI
6874	  (vec_concat:V16SI
6875	    (match_operand:V8SI 1 "register_operand" "x")
6876	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6877	  (parallel [(const_int 2) (const_int 10)
6878		     (const_int 3) (const_int 11)
6879		     (const_int 6) (const_int 14)
6880		     (const_int 7) (const_int 15)])))]
6881  "TARGET_AVX2"
6882  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6883  [(set_attr "type" "sselog")
6884   (set_attr "prefix" "vex")
6885   (set_attr "mode" "OI")])
6886
6887(define_insn "vec_interleave_highv4si"
6888  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6889	(vec_select:V4SI
6890	  (vec_concat:V8SI
6891	    (match_operand:V4SI 1 "register_operand" "0,x")
6892	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6893	  (parallel [(const_int 2) (const_int 6)
6894		     (const_int 3) (const_int 7)])))]
6895  "TARGET_SSE2"
6896  "@
6897   punpckhdq\t{%2, %0|%0, %2}
6898   vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6899  [(set_attr "isa" "noavx,avx")
6900   (set_attr "type" "sselog")
6901   (set_attr "prefix_data16" "1,*")
6902   (set_attr "prefix" "orig,vex")
6903   (set_attr "mode" "TI")])
6904
6905(define_insn "avx2_interleave_lowv8si"
6906  [(set (match_operand:V8SI 0 "register_operand" "=x")
6907	(vec_select:V8SI
6908	  (vec_concat:V16SI
6909	    (match_operand:V8SI 1 "register_operand" "x")
6910	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6911	  (parallel [(const_int 0) (const_int 8)
6912		     (const_int 1) (const_int 9)
6913		     (const_int 4) (const_int 12)
6914		     (const_int 5) (const_int 13)])))]
6915  "TARGET_AVX2"
6916  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6917  [(set_attr "type" "sselog")
6918   (set_attr "prefix" "vex")
6919   (set_attr "mode" "OI")])
6920
6921(define_insn "vec_interleave_lowv4si"
6922  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6923	(vec_select:V4SI
6924	  (vec_concat:V8SI
6925	    (match_operand:V4SI 1 "register_operand" "0,x")
6926	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6927	  (parallel [(const_int 0) (const_int 4)
6928		     (const_int 1) (const_int 5)])))]
6929  "TARGET_SSE2"
6930  "@
6931   punpckldq\t{%2, %0|%0, %2}
6932   vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6933  [(set_attr "isa" "noavx,avx")
6934   (set_attr "type" "sselog")
6935   (set_attr "prefix_data16" "1,*")
6936   (set_attr "prefix" "orig,vex")
6937   (set_attr "mode" "TI")])
6938
6939(define_expand "vec_interleave_high<mode>"
6940  [(match_operand:VI_256 0 "register_operand" "=x")
6941   (match_operand:VI_256 1 "register_operand" "x")
6942   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6943 "TARGET_AVX2"
6944{
6945  rtx t1 = gen_reg_rtx (<MODE>mode);
6946  rtx t2 = gen_reg_rtx (<MODE>mode);
6947  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6948  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
6949  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6950				gen_lowpart (V4DImode, t1),
6951				gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6952  DONE;
6953})
6954
6955(define_expand "vec_interleave_low<mode>"
6956  [(match_operand:VI_256 0 "register_operand" "=x")
6957   (match_operand:VI_256 1 "register_operand" "x")
6958   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6959 "TARGET_AVX2"
6960{
6961  rtx t1 = gen_reg_rtx (<MODE>mode);
6962  rtx t2 = gen_reg_rtx (<MODE>mode);
6963  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6964  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6965  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6966				gen_lowpart (V4DImode, t1),
6967				gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6968  DONE;
6969})
6970
6971;; Modes handled by pinsr patterns.
6972(define_mode_iterator PINSR_MODE
6973  [(V16QI "TARGET_SSE4_1") V8HI
6974   (V4SI "TARGET_SSE4_1")
6975   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6976
6977(define_mode_attr sse2p4_1
6978  [(V16QI "sse4_1") (V8HI "sse2")
6979   (V4SI "sse4_1") (V2DI "sse4_1")])
6980
6981;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6982(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6983  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6984	(vec_merge:PINSR_MODE
6985	  (vec_duplicate:PINSR_MODE
6986	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6987	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6988	  (match_operand:SI 3 "const_int_operand" "")))]
6989  "TARGET_SSE2
6990   && ((unsigned) exact_log2 (INTVAL (operands[3]))
6991       < GET_MODE_NUNITS (<MODE>mode))"
6992{
6993  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6994
6995  switch (which_alternative)
6996    {
6997    case 0:
6998      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6999	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
7000      /* FALLTHRU */
7001    case 1:
7002      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
7003    case 2:
7004      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
7005	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7006      /* FALLTHRU */
7007    case 3:
7008      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7009    default:
7010      gcc_unreachable ();
7011    }
7012}
7013  [(set_attr "isa" "noavx,noavx,avx,avx")
7014   (set_attr "type" "sselog")
7015   (set (attr "prefix_rex")
7016     (if_then_else
7017       (and (not (match_test "TARGET_AVX"))
7018	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
7019       (const_string "1")
7020       (const_string "*")))
7021   (set (attr "prefix_data16")
7022     (if_then_else
7023       (and (not (match_test "TARGET_AVX"))
7024	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7025       (const_string "1")
7026       (const_string "*")))
7027   (set (attr "prefix_extra")
7028     (if_then_else
7029       (and (not (match_test "TARGET_AVX"))
7030	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
7031       (const_string "*")
7032       (const_string "1")))
7033   (set_attr "length_immediate" "1")
7034   (set_attr "prefix" "orig,orig,vex,vex")
7035   (set_attr "mode" "TI")])
7036
7037(define_insn "*sse4_1_pextrb_<mode>"
7038  [(set (match_operand:SWI48 0 "register_operand" "=r")
7039	(zero_extend:SWI48
7040	  (vec_select:QI
7041	    (match_operand:V16QI 1 "register_operand" "x")
7042	    (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7043  "TARGET_SSE4_1"
7044  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7045  [(set_attr "type" "sselog")
7046   (set_attr "prefix_extra" "1")
7047   (set_attr "length_immediate" "1")
7048   (set_attr "prefix" "maybe_vex")
7049   (set_attr "mode" "TI")])
7050
7051(define_insn "*sse4_1_pextrb_memory"
7052  [(set (match_operand:QI 0 "memory_operand" "=m")
7053	(vec_select:QI
7054	  (match_operand:V16QI 1 "register_operand" "x")
7055	  (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7056  "TARGET_SSE4_1"
7057  "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7058  [(set_attr "type" "sselog")
7059   (set_attr "prefix_extra" "1")
7060   (set_attr "length_immediate" "1")
7061   (set_attr "prefix" "maybe_vex")
7062   (set_attr "mode" "TI")])
7063
7064(define_insn "*sse2_pextrw_<mode>"
7065  [(set (match_operand:SWI48 0 "register_operand" "=r")
7066	(zero_extend:SWI48
7067	  (vec_select:HI
7068	    (match_operand:V8HI 1 "register_operand" "x")
7069	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7070  "TARGET_SSE2"
7071  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7072  [(set_attr "type" "sselog")
7073   (set_attr "prefix_data16" "1")
7074   (set_attr "length_immediate" "1")
7075   (set_attr "prefix" "maybe_vex")
7076   (set_attr "mode" "TI")])
7077
7078(define_insn "*sse4_1_pextrw_memory"
7079  [(set (match_operand:HI 0 "memory_operand" "=m")
7080	(vec_select:HI
7081	  (match_operand:V8HI 1 "register_operand" "x")
7082	  (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7083  "TARGET_SSE4_1"
7084  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7085  [(set_attr "type" "sselog")
7086   (set_attr "prefix_extra" "1")
7087   (set_attr "length_immediate" "1")
7088   (set_attr "prefix" "maybe_vex")
7089   (set_attr "mode" "TI")])
7090
7091(define_insn "*sse4_1_pextrd"
7092  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7093	(vec_select:SI
7094	  (match_operand:V4SI 1 "register_operand" "x")
7095	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7096  "TARGET_SSE4_1"
7097  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7098  [(set_attr "type" "sselog")
7099   (set_attr "prefix_extra" "1")
7100   (set_attr "length_immediate" "1")
7101   (set_attr "prefix" "maybe_vex")
7102   (set_attr "mode" "TI")])
7103
7104(define_insn "*sse4_1_pextrd_zext"
7105  [(set (match_operand:DI 0 "register_operand" "=r")
7106	(zero_extend:DI
7107	  (vec_select:SI
7108	    (match_operand:V4SI 1 "register_operand" "x")
7109	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7110  "TARGET_64BIT && TARGET_SSE4_1"
7111  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7112  [(set_attr "type" "sselog")
7113   (set_attr "prefix_extra" "1")
7114   (set_attr "length_immediate" "1")
7115   (set_attr "prefix" "maybe_vex")
7116   (set_attr "mode" "TI")])
7117
7118;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7119(define_insn "*sse4_1_pextrq"
7120  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7121	(vec_select:DI
7122	  (match_operand:V2DI 1 "register_operand" "x")
7123	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7124  "TARGET_SSE4_1 && TARGET_64BIT"
7125  "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7126  [(set_attr "type" "sselog")
7127   (set_attr "prefix_rex" "1")
7128   (set_attr "prefix_extra" "1")
7129   (set_attr "length_immediate" "1")
7130   (set_attr "prefix" "maybe_vex")
7131   (set_attr "mode" "TI")])
7132
7133(define_expand "avx2_pshufdv3"
7134  [(match_operand:V8SI 0 "register_operand" "")
7135   (match_operand:V8SI 1 "nonimmediate_operand" "")
7136   (match_operand:SI 2 "const_0_to_255_operand" "")]
7137  "TARGET_AVX2"
7138{
7139  int mask = INTVAL (operands[2]);
7140  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7141				GEN_INT ((mask >> 0) & 3),
7142				GEN_INT ((mask >> 2) & 3),
7143				GEN_INT ((mask >> 4) & 3),
7144				GEN_INT ((mask >> 6) & 3),
7145				GEN_INT (((mask >> 0) & 3) + 4),
7146				GEN_INT (((mask >> 2) & 3) + 4),
7147				GEN_INT (((mask >> 4) & 3) + 4),
7148				GEN_INT (((mask >> 6) & 3) + 4)));
7149  DONE;
7150})
7151
7152(define_insn "avx2_pshufd_1"
7153  [(set (match_operand:V8SI 0 "register_operand" "=x")
7154	(vec_select:V8SI
7155	  (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7156	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7157		     (match_operand 3 "const_0_to_3_operand" "")
7158		     (match_operand 4 "const_0_to_3_operand" "")
7159		     (match_operand 5 "const_0_to_3_operand" "")
7160		     (match_operand 6 "const_4_to_7_operand" "")
7161		     (match_operand 7 "const_4_to_7_operand" "")
7162		     (match_operand 8 "const_4_to_7_operand" "")
7163		     (match_operand 9 "const_4_to_7_operand" "")])))]
7164  "TARGET_AVX2
7165   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7166   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7167   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7168   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7169{
7170  int mask = 0;
7171  mask |= INTVAL (operands[2]) << 0;
7172  mask |= INTVAL (operands[3]) << 2;
7173  mask |= INTVAL (operands[4]) << 4;
7174  mask |= INTVAL (operands[5]) << 6;
7175  operands[2] = GEN_INT (mask);
7176
7177  return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7178}
7179  [(set_attr "type" "sselog1")
7180   (set_attr "prefix" "vex")
7181   (set_attr "length_immediate" "1")
7182   (set_attr "mode" "OI")])
7183
7184(define_expand "sse2_pshufd"
7185  [(match_operand:V4SI 0 "register_operand" "")
7186   (match_operand:V4SI 1 "nonimmediate_operand" "")
7187   (match_operand:SI 2 "const_int_operand" "")]
7188  "TARGET_SSE2"
7189{
7190  int mask = INTVAL (operands[2]);
7191  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7192				GEN_INT ((mask >> 0) & 3),
7193				GEN_INT ((mask >> 2) & 3),
7194				GEN_INT ((mask >> 4) & 3),
7195				GEN_INT ((mask >> 6) & 3)));
7196  DONE;
7197})
7198
7199(define_insn "sse2_pshufd_1"
7200  [(set (match_operand:V4SI 0 "register_operand" "=x")
7201	(vec_select:V4SI
7202	  (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7203	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7204		     (match_operand 3 "const_0_to_3_operand" "")
7205		     (match_operand 4 "const_0_to_3_operand" "")
7206		     (match_operand 5 "const_0_to_3_operand" "")])))]
7207  "TARGET_SSE2"
7208{
7209  int mask = 0;
7210  mask |= INTVAL (operands[2]) << 0;
7211  mask |= INTVAL (operands[3]) << 2;
7212  mask |= INTVAL (operands[4]) << 4;
7213  mask |= INTVAL (operands[5]) << 6;
7214  operands[2] = GEN_INT (mask);
7215
7216  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7217}
7218  [(set_attr "type" "sselog1")
7219   (set_attr "prefix_data16" "1")
7220   (set_attr "prefix" "maybe_vex")
7221   (set_attr "length_immediate" "1")
7222   (set_attr "mode" "TI")])
7223
7224(define_expand "avx2_pshuflwv3"
7225  [(match_operand:V16HI 0 "register_operand" "")
7226   (match_operand:V16HI 1 "nonimmediate_operand" "")
7227   (match_operand:SI 2 "const_0_to_255_operand" "")]
7228  "TARGET_AVX2"
7229{
7230  int mask = INTVAL (operands[2]);
7231  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7232				 GEN_INT ((mask >> 0) & 3),
7233				 GEN_INT ((mask >> 2) & 3),
7234				 GEN_INT ((mask >> 4) & 3),
7235				 GEN_INT ((mask >> 6) & 3),
7236				 GEN_INT (((mask >> 0) & 3) + 8),
7237				 GEN_INT (((mask >> 2) & 3) + 8),
7238				 GEN_INT (((mask >> 4) & 3) + 8),
7239				 GEN_INT (((mask >> 6) & 3) + 8)));
7240  DONE;
7241})
7242
7243(define_insn "avx2_pshuflw_1"
7244  [(set (match_operand:V16HI 0 "register_operand" "=x")
7245	(vec_select:V16HI
7246	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7247	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7248		     (match_operand 3 "const_0_to_3_operand" "")
7249		     (match_operand 4 "const_0_to_3_operand" "")
7250		     (match_operand 5 "const_0_to_3_operand" "")
7251		     (const_int 4)
7252		     (const_int 5)
7253		     (const_int 6)
7254		     (const_int 7)
7255		     (match_operand 6 "const_8_to_11_operand" "")
7256		     (match_operand 7 "const_8_to_11_operand" "")
7257		     (match_operand 8 "const_8_to_11_operand" "")
7258		     (match_operand 9 "const_8_to_11_operand" "")
7259		     (const_int 12)
7260		     (const_int 13)
7261		     (const_int 14)
7262		     (const_int 15)])))]
7263  "TARGET_AVX2
7264   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7265   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7266   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7267   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7268{
7269  int mask = 0;
7270  mask |= INTVAL (operands[2]) << 0;
7271  mask |= INTVAL (operands[3]) << 2;
7272  mask |= INTVAL (operands[4]) << 4;
7273  mask |= INTVAL (operands[5]) << 6;
7274  operands[2] = GEN_INT (mask);
7275
7276  return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7277}
7278  [(set_attr "type" "sselog")
7279   (set_attr "prefix" "vex")
7280   (set_attr "length_immediate" "1")
7281   (set_attr "mode" "OI")])
7282
7283(define_expand "sse2_pshuflw"
7284  [(match_operand:V8HI 0 "register_operand" "")
7285   (match_operand:V8HI 1 "nonimmediate_operand" "")
7286   (match_operand:SI 2 "const_int_operand" "")]
7287  "TARGET_SSE2"
7288{
7289  int mask = INTVAL (operands[2]);
7290  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7291				 GEN_INT ((mask >> 0) & 3),
7292				 GEN_INT ((mask >> 2) & 3),
7293				 GEN_INT ((mask >> 4) & 3),
7294				 GEN_INT ((mask >> 6) & 3)));
7295  DONE;
7296})
7297
7298(define_insn "sse2_pshuflw_1"
7299  [(set (match_operand:V8HI 0 "register_operand" "=x")
7300	(vec_select:V8HI
7301	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7302	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
7303		     (match_operand 3 "const_0_to_3_operand" "")
7304		     (match_operand 4 "const_0_to_3_operand" "")
7305		     (match_operand 5 "const_0_to_3_operand" "")
7306		     (const_int 4)
7307		     (const_int 5)
7308		     (const_int 6)
7309		     (const_int 7)])))]
7310  "TARGET_SSE2"
7311{
7312  int mask = 0;
7313  mask |= INTVAL (operands[2]) << 0;
7314  mask |= INTVAL (operands[3]) << 2;
7315  mask |= INTVAL (operands[4]) << 4;
7316  mask |= INTVAL (operands[5]) << 6;
7317  operands[2] = GEN_INT (mask);
7318
7319  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7320}
7321  [(set_attr "type" "sselog")
7322   (set_attr "prefix_data16" "0")
7323   (set_attr "prefix_rep" "1")
7324   (set_attr "prefix" "maybe_vex")
7325   (set_attr "length_immediate" "1")
7326   (set_attr "mode" "TI")])
7327
7328(define_expand "avx2_pshufhwv3"
7329  [(match_operand:V16HI 0 "register_operand" "")
7330   (match_operand:V16HI 1 "nonimmediate_operand" "")
7331   (match_operand:SI 2 "const_0_to_255_operand" "")]
7332  "TARGET_AVX2"
7333{
7334  int mask = INTVAL (operands[2]);
7335  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7336				 GEN_INT (((mask >> 0) & 3) + 4),
7337				 GEN_INT (((mask >> 2) & 3) + 4),
7338				 GEN_INT (((mask >> 4) & 3) + 4),
7339				 GEN_INT (((mask >> 6) & 3) + 4),
7340				 GEN_INT (((mask >> 0) & 3) + 12),
7341				 GEN_INT (((mask >> 2) & 3) + 12),
7342				 GEN_INT (((mask >> 4) & 3) + 12),
7343				 GEN_INT (((mask >> 6) & 3) + 12)));
7344  DONE;
7345})
7346
7347(define_insn "avx2_pshufhw_1"
7348  [(set (match_operand:V16HI 0 "register_operand" "=x")
7349	(vec_select:V16HI
7350	  (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7351	  (parallel [(const_int 0)
7352		     (const_int 1)
7353		     (const_int 2)
7354		     (const_int 3)
7355		     (match_operand 2 "const_4_to_7_operand" "")
7356		     (match_operand 3 "const_4_to_7_operand" "")
7357		     (match_operand 4 "const_4_to_7_operand" "")
7358		     (match_operand 5 "const_4_to_7_operand" "")
7359		     (const_int 8)
7360		     (const_int 9)
7361		     (const_int 10)
7362		     (const_int 11)
7363		     (match_operand 6 "const_12_to_15_operand" "")
7364		     (match_operand 7 "const_12_to_15_operand" "")
7365		     (match_operand 8 "const_12_to_15_operand" "")
7366		     (match_operand 9 "const_12_to_15_operand" "")])))]
7367  "TARGET_AVX2
7368   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7369   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7370   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7371   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7372{
7373  int mask = 0;
7374  mask |= (INTVAL (operands[2]) - 4) << 0;
7375  mask |= (INTVAL (operands[3]) - 4) << 2;
7376  mask |= (INTVAL (operands[4]) - 4) << 4;
7377  mask |= (INTVAL (operands[5]) - 4) << 6;
7378  operands[2] = GEN_INT (mask);
7379
7380  return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7381}
7382  [(set_attr "type" "sselog")
7383   (set_attr "prefix" "vex")
7384   (set_attr "length_immediate" "1")
7385   (set_attr "mode" "OI")])
7386
7387(define_expand "sse2_pshufhw"
7388  [(match_operand:V8HI 0 "register_operand" "")
7389   (match_operand:V8HI 1 "nonimmediate_operand" "")
7390   (match_operand:SI 2 "const_int_operand" "")]
7391  "TARGET_SSE2"
7392{
7393  int mask = INTVAL (operands[2]);
7394  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7395				 GEN_INT (((mask >> 0) & 3) + 4),
7396				 GEN_INT (((mask >> 2) & 3) + 4),
7397				 GEN_INT (((mask >> 4) & 3) + 4),
7398				 GEN_INT (((mask >> 6) & 3) + 4)));
7399  DONE;
7400})
7401
7402(define_insn "sse2_pshufhw_1"
7403  [(set (match_operand:V8HI 0 "register_operand" "=x")
7404	(vec_select:V8HI
7405	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7406	  (parallel [(const_int 0)
7407		     (const_int 1)
7408		     (const_int 2)
7409		     (const_int 3)
7410		     (match_operand 2 "const_4_to_7_operand" "")
7411		     (match_operand 3 "const_4_to_7_operand" "")
7412		     (match_operand 4 "const_4_to_7_operand" "")
7413		     (match_operand 5 "const_4_to_7_operand" "")])))]
7414  "TARGET_SSE2"
7415{
7416  int mask = 0;
7417  mask |= (INTVAL (operands[2]) - 4) << 0;
7418  mask |= (INTVAL (operands[3]) - 4) << 2;
7419  mask |= (INTVAL (operands[4]) - 4) << 4;
7420  mask |= (INTVAL (operands[5]) - 4) << 6;
7421  operands[2] = GEN_INT (mask);
7422
7423  return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7424}
7425  [(set_attr "type" "sselog")
7426   (set_attr "prefix_rep" "1")
7427   (set_attr "prefix_data16" "0")
7428   (set_attr "prefix" "maybe_vex")
7429   (set_attr "length_immediate" "1")
7430   (set_attr "mode" "TI")])
7431
7432(define_expand "sse2_loadd"
7433  [(set (match_operand:V4SI 0 "register_operand" "")
7434	(vec_merge:V4SI
7435	  (vec_duplicate:V4SI
7436	    (match_operand:SI 1 "nonimmediate_operand" ""))
7437	  (match_dup 2)
7438	  (const_int 1)))]
7439  "TARGET_SSE"
7440  "operands[2] = CONST0_RTX (V4SImode);")
7441
7442(define_insn "sse2_loadld"
7443  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x,x,x")
7444	(vec_merge:V4SI
7445	  (vec_duplicate:V4SI
7446	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7447	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,x")
7448	  (const_int 1)))]
7449  "TARGET_SSE"
7450  "@
7451   %vmovd\t{%2, %0|%0, %2}
7452   %vmovd\t{%2, %0|%0, %2}
7453   movss\t{%2, %0|%0, %2}
7454   movss\t{%2, %0|%0, %2}
7455   vmovss\t{%2, %1, %0|%0, %1, %2}"
7456  [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7457   (set_attr "type" "ssemov")
7458   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7459   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7460
7461(define_insn_and_split "sse2_stored"
7462  [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7463	(vec_select:SI
7464	  (match_operand:V4SI 1 "register_operand" "x,Yi")
7465	  (parallel [(const_int 0)])))]
7466  "TARGET_SSE"
7467  "#"
7468  "&& reload_completed
7469   && (TARGET_INTER_UNIT_MOVES
7470       || MEM_P (operands [0])
7471       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7472  [(set (match_dup 0) (match_dup 1))]
7473  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7474
7475(define_insn_and_split "*vec_ext_v4si_mem"
7476  [(set (match_operand:SI 0 "register_operand" "=r")
7477	(vec_select:SI
7478	  (match_operand:V4SI 1 "memory_operand" "o")
7479	  (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7480  ""
7481  "#"
7482  "reload_completed"
7483  [(const_int 0)]
7484{
7485  int i = INTVAL (operands[2]);
7486
7487  emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7488  DONE;
7489})
7490
7491(define_expand "sse_storeq"
7492  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7493	(vec_select:DI
7494	  (match_operand:V2DI 1 "register_operand" "")
7495	  (parallel [(const_int 0)])))]
7496  "TARGET_SSE")
7497
7498(define_insn "*sse2_storeq_rex64"
7499  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7500	(vec_select:DI
7501	  (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7502	  (parallel [(const_int 0)])))]
7503  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7504  "@
7505   #
7506   #
7507   mov{q}\t{%1, %0|%0, %1}"
7508  [(set_attr "type" "*,*,imov")
7509   (set_attr "mode" "*,*,DI")])
7510
7511(define_insn "*sse2_storeq"
7512  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7513	(vec_select:DI
7514	  (match_operand:V2DI 1 "register_operand" "x")
7515	  (parallel [(const_int 0)])))]
7516  "TARGET_SSE"
7517  "#")
7518
7519(define_split
7520  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7521	(vec_select:DI
7522	  (match_operand:V2DI 1 "register_operand" "")
7523	  (parallel [(const_int 0)])))]
7524  "TARGET_SSE
7525   && reload_completed
7526   && (TARGET_INTER_UNIT_MOVES
7527       || MEM_P (operands [0])
7528       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7529  [(set (match_dup 0) (match_dup 1))]
7530  "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7531
7532(define_insn "*vec_extractv2di_1_rex64"
7533  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,r")
7534	(vec_select:DI
7535	  (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7536	  (parallel [(const_int 1)])))]
7537  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7538  "@
7539   %vmovhps\t{%1, %0|%0, %1}
7540   psrldq\t{$8, %0|%0, 8}
7541   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7542   %vmovq\t{%H1, %0|%0, %H1}
7543   mov{q}\t{%H1, %0|%0, %H1}"
7544  [(set_attr "isa" "*,noavx,avx,*,*")
7545   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7546   (set_attr "length_immediate" "*,1,1,*,*")
7547   (set_attr "memory" "*,none,none,*,*")
7548   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7549   (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7550
7551(define_insn "*vec_extractv2di_1"
7552  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,x,x")
7553	(vec_select:DI
7554	  (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7555	  (parallel [(const_int 1)])))]
7556  "!TARGET_64BIT && TARGET_SSE
7557   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7558  "@
7559   %vmovhps\t{%1, %0|%0, %1}
7560   psrldq\t{$8, %0|%0, 8}
7561   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7562   %vmovq\t{%H1, %0|%0, %H1}
7563   movhlps\t{%1, %0|%0, %1}
7564   movlps\t{%H1, %0|%0, %H1}"
7565  [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7566   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7567   (set_attr "length_immediate" "*,1,1,*,*,*")
7568   (set_attr "memory" "*,none,none,*,*,*")
7569   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7570   (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7571
7572(define_insn "*vec_dupv4si"
7573  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
7574	(vec_duplicate:V4SI
7575	  (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7576  "TARGET_SSE"
7577  "@
7578   %vpshufd\t{$0, %1, %0|%0, %1, 0}
7579   vbroadcastss\t{%1, %0|%0, %1}
7580   shufps\t{$0, %0, %0|%0, %0, 0}"
7581  [(set_attr "isa" "sse2,avx,noavx")
7582   (set_attr "type" "sselog1,ssemov,sselog1")
7583   (set_attr "length_immediate" "1,0,1")
7584   (set_attr "prefix_extra" "0,1,*")
7585   (set_attr "prefix" "maybe_vex,vex,orig")
7586   (set_attr "mode" "TI,V4SF,V4SF")])
7587
7588(define_insn "*vec_dupv2di"
7589  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
7590	(vec_duplicate:V2DI
7591	  (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7592  "TARGET_SSE"
7593  "@
7594   punpcklqdq\t%0, %0
7595   vpunpcklqdq\t{%d1, %0|%0, %d1}
7596   %vmovddup\t{%1, %0|%0, %1}
7597   movlhps\t%0, %0"
7598  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7599   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7600   (set_attr "prefix" "orig,vex,maybe_vex,orig")
7601   (set_attr "mode" "TI,TI,DF,V4SF")])
7602
7603(define_insn "*vec_concatv2si_sse4_1"
7604  [(set (match_operand:V2SI 0 "register_operand"     "=x, x,x,x, x, *y,*y")
7605	(vec_concat:V2SI
7606	  (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm,  0,rm")
7607	  (match_operand:SI 2 "vector_move_operand"  "rm,rm,x,x, C,*ym, C")))]
7608  "TARGET_SSE4_1"
7609  "@
7610   pinsrd\t{$1, %2, %0|%0, %2, 1}
7611   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7612   punpckldq\t{%2, %0|%0, %2}
7613   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7614   %vmovd\t{%1, %0|%0, %1}
7615   punpckldq\t{%2, %0|%0, %2}
7616   movd\t{%1, %0|%0, %1}"
7617  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7618   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7619   (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7620   (set_attr "length_immediate" "1,1,*,*,*,*,*")
7621   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7622   (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7623
7624;; ??? In theory we can match memory for the MMX alternative, but allowing
7625;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7626;; alternatives pretty much forces the MMX alternative to be chosen.
7627(define_insn "*vec_concatv2si_sse2"
7628  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,*y")
7629	(vec_concat:V2SI
7630	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7631	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,*y, C")))]
7632  "TARGET_SSE2"
7633  "@
7634   punpckldq\t{%2, %0|%0, %2}
7635   movd\t{%1, %0|%0, %1}
7636   punpckldq\t{%2, %0|%0, %2}
7637   movd\t{%1, %0|%0, %1}"
7638  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7639   (set_attr "mode" "TI,TI,DI,DI")])
7640
7641(define_insn "*vec_concatv2si_sse"
7642  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
7643	(vec_concat:V2SI
7644	  (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7645	  (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
7646  "TARGET_SSE"
7647  "@
7648   unpcklps\t{%2, %0|%0, %2}
7649   movss\t{%1, %0|%0, %1}
7650   punpckldq\t{%2, %0|%0, %2}
7651   movd\t{%1, %0|%0, %1}"
7652  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7653   (set_attr "mode" "V4SF,V4SF,DI,DI")])
7654
7655(define_insn "*vec_concatv4si"
7656  [(set (match_operand:V4SI 0 "register_operand"       "=x,x,x,x,x")
7657	(vec_concat:V4SI
7658	  (match_operand:V2SI 1 "register_operand"     " 0,x,0,0,x")
7659	  (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7660  "TARGET_SSE"
7661  "@
7662   punpcklqdq\t{%2, %0|%0, %2}
7663   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7664   movlhps\t{%2, %0|%0, %2}
7665   movhps\t{%2, %0|%0, %2}
7666   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7667  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7668   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7669   (set_attr "prefix" "orig,vex,orig,orig,vex")
7670   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7671
7672;; movd instead of movq is required to handle broken assemblers.
7673(define_insn "*vec_concatv2di_rex64"
7674  [(set (match_operand:V2DI 0 "register_operand"
7675	  "=x,x ,x ,Yi,!x,x,x,x,x")
7676	(vec_concat:V2DI
7677	  (match_operand:DI 1 "nonimmediate_operand"
7678	  " 0,x ,xm,r ,*y,0,x,0,x")
7679	  (match_operand:DI 2 "vector_move_operand"
7680	  "rm,rm,C ,C ,C ,x,x,m,m")))]
7681  "TARGET_64BIT"
7682  "@
7683   pinsrq\t{$1, %2, %0|%0, %2, 1}
7684   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7685   %vmovq\t{%1, %0|%0, %1}
7686   %vmovd\t{%1, %0|%0, %1}
7687   movq2dq\t{%1, %0|%0, %1}
7688   punpcklqdq\t{%2, %0|%0, %2}
7689   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7690   movhps\t{%2, %0|%0, %2}
7691   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7692  [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7693   (set (attr "type")
7694     (if_then_else
7695       (eq_attr "alternative" "0,1,5,6")
7696       (const_string "sselog")
7697       (const_string "ssemov")))
7698   (set (attr "prefix_rex")
7699     (if_then_else
7700       (and (eq_attr "alternative" "0,3")
7701	    (not (match_test "TARGET_AVX")))
7702       (const_string "1")
7703       (const_string "*")))
7704   (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7705   (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7706   (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7707   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7708
7709(define_insn "vec_concatv2di"
7710  [(set (match_operand:V2DI 0 "register_operand"     "=x,?x,x,x,x,x,x")
7711	(vec_concat:V2DI
7712	  (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7713	  (match_operand:DI 2 "vector_move_operand"  " C, C,x,x,x,m,m")))]
7714  "!TARGET_64BIT && TARGET_SSE"
7715  "@
7716   %vmovq\t{%1, %0|%0, %1}
7717   movq2dq\t{%1, %0|%0, %1}
7718   punpcklqdq\t{%2, %0|%0, %2}
7719   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7720   movlhps\t{%2, %0|%0, %2}
7721   movhps\t{%2, %0|%0, %2}
7722   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7723  [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7724   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7725   (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7726   (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7727
7728(define_expand "vec_unpacks_lo_<mode>"
7729  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7730   (match_operand:VI124_AVX2 1 "register_operand" "")]
7731  "TARGET_SSE2"
7732  "ix86_expand_sse_unpack (operands, false, false); DONE;")
7733
7734(define_expand "vec_unpacks_hi_<mode>"
7735  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7736   (match_operand:VI124_AVX2 1 "register_operand" "")]
7737  "TARGET_SSE2"
7738  "ix86_expand_sse_unpack (operands, false, true); DONE;")
7739
7740(define_expand "vec_unpacku_lo_<mode>"
7741  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7742   (match_operand:VI124_AVX2 1 "register_operand" "")]
7743  "TARGET_SSE2"
7744  "ix86_expand_sse_unpack (operands, true, false); DONE;")
7745
7746(define_expand "vec_unpacku_hi_<mode>"
7747  [(match_operand:<sseunpackmode> 0 "register_operand" "")
7748   (match_operand:VI124_AVX2 1 "register_operand" "")]
7749  "TARGET_SSE2"
7750  "ix86_expand_sse_unpack (operands, true, true); DONE;")
7751
7752;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7753;;
7754;; Miscellaneous
7755;;
7756;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7757
7758(define_expand "avx2_uavgv32qi3"
7759  [(set (match_operand:V32QI 0 "register_operand" "")
7760	(truncate:V32QI
7761	  (lshiftrt:V32HI
7762	    (plus:V32HI
7763	      (plus:V32HI
7764		(zero_extend:V32HI
7765		  (match_operand:V32QI 1 "nonimmediate_operand" ""))
7766		(zero_extend:V32HI
7767		  (match_operand:V32QI 2 "nonimmediate_operand" "")))
7768	      (const_vector:V32QI [(const_int 1) (const_int 1)
7769				   (const_int 1) (const_int 1)
7770				   (const_int 1) (const_int 1)
7771				   (const_int 1) (const_int 1)
7772				   (const_int 1) (const_int 1)
7773				   (const_int 1) (const_int 1)
7774				   (const_int 1) (const_int 1)
7775				   (const_int 1) (const_int 1)
7776				   (const_int 1) (const_int 1)
7777				   (const_int 1) (const_int 1)
7778				   (const_int 1) (const_int 1)
7779				   (const_int 1) (const_int 1)
7780				   (const_int 1) (const_int 1)
7781				   (const_int 1) (const_int 1)
7782				   (const_int 1) (const_int 1)
7783				   (const_int 1) (const_int 1)]))
7784	    (const_int 1))))]
7785  "TARGET_AVX2"
7786  "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7787
7788(define_expand "sse2_uavgv16qi3"
7789  [(set (match_operand:V16QI 0 "register_operand" "")
7790	(truncate:V16QI
7791	  (lshiftrt:V16HI
7792	    (plus:V16HI
7793	      (plus:V16HI
7794		(zero_extend:V16HI
7795		  (match_operand:V16QI 1 "nonimmediate_operand" ""))
7796		(zero_extend:V16HI
7797		  (match_operand:V16QI 2 "nonimmediate_operand" "")))
7798	      (const_vector:V16QI [(const_int 1) (const_int 1)
7799				   (const_int 1) (const_int 1)
7800				   (const_int 1) (const_int 1)
7801				   (const_int 1) (const_int 1)
7802				   (const_int 1) (const_int 1)
7803				   (const_int 1) (const_int 1)
7804				   (const_int 1) (const_int 1)
7805				   (const_int 1) (const_int 1)]))
7806	    (const_int 1))))]
7807  "TARGET_SSE2"
7808  "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7809
7810(define_insn "*avx2_uavgv32qi3"
7811  [(set (match_operand:V32QI 0 "register_operand" "=x")
7812	(truncate:V32QI
7813	  (lshiftrt:V32HI
7814	    (plus:V32HI
7815	      (plus:V32HI
7816		(zero_extend:V32HI
7817		  (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7818		(zero_extend:V32HI
7819		  (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7820	      (const_vector:V32QI [(const_int 1) (const_int 1)
7821				   (const_int 1) (const_int 1)
7822				   (const_int 1) (const_int 1)
7823				   (const_int 1) (const_int 1)
7824				   (const_int 1) (const_int 1)
7825				   (const_int 1) (const_int 1)
7826				   (const_int 1) (const_int 1)
7827				   (const_int 1) (const_int 1)
7828				   (const_int 1) (const_int 1)
7829				   (const_int 1) (const_int 1)
7830				   (const_int 1) (const_int 1)
7831				   (const_int 1) (const_int 1)
7832				   (const_int 1) (const_int 1)
7833				   (const_int 1) (const_int 1)
7834				   (const_int 1) (const_int 1)
7835				   (const_int 1) (const_int 1)]))
7836	    (const_int 1))))]
7837  "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7838  "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7839  [(set_attr "type" "sseiadd")
7840   (set_attr "prefix" "vex")
7841   (set_attr "mode" "OI")])
7842
7843(define_insn "*sse2_uavgv16qi3"
7844  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7845	(truncate:V16QI
7846	  (lshiftrt:V16HI
7847	    (plus:V16HI
7848	      (plus:V16HI
7849		(zero_extend:V16HI
7850		  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7851		(zero_extend:V16HI
7852		  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7853	      (const_vector:V16QI [(const_int 1) (const_int 1)
7854				   (const_int 1) (const_int 1)
7855				   (const_int 1) (const_int 1)
7856				   (const_int 1) (const_int 1)
7857				   (const_int 1) (const_int 1)
7858				   (const_int 1) (const_int 1)
7859				   (const_int 1) (const_int 1)
7860				   (const_int 1) (const_int 1)]))
7861	    (const_int 1))))]
7862  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7863  "@
7864   pavgb\t{%2, %0|%0, %2}
7865   vpavgb\t{%2, %1, %0|%0, %1, %2}"
7866  [(set_attr "isa" "noavx,avx")
7867   (set_attr "type" "sseiadd")
7868   (set_attr "prefix_data16" "1,*")
7869   (set_attr "prefix" "orig,vex")
7870   (set_attr "mode" "TI")])
7871
7872(define_expand "avx2_uavgv16hi3"
7873  [(set (match_operand:V16HI 0 "register_operand" "")
7874	(truncate:V16HI
7875	  (lshiftrt:V16SI
7876	    (plus:V16SI
7877	      (plus:V16SI
7878		(zero_extend:V16SI
7879		  (match_operand:V16HI 1 "nonimmediate_operand" ""))
7880		(zero_extend:V16SI
7881		  (match_operand:V16HI 2 "nonimmediate_operand" "")))
7882	      (const_vector:V16HI [(const_int 1) (const_int 1)
7883				   (const_int 1) (const_int 1)
7884				   (const_int 1) (const_int 1)
7885				   (const_int 1) (const_int 1)
7886				   (const_int 1) (const_int 1)
7887				   (const_int 1) (const_int 1)
7888				   (const_int 1) (const_int 1)
7889				   (const_int 1) (const_int 1)]))
7890	    (const_int 1))))]
7891  "TARGET_AVX2"
7892  "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7893
7894(define_expand "sse2_uavgv8hi3"
7895  [(set (match_operand:V8HI 0 "register_operand" "")
7896	(truncate:V8HI
7897	  (lshiftrt:V8SI
7898	    (plus:V8SI
7899	      (plus:V8SI
7900		(zero_extend:V8SI
7901		  (match_operand:V8HI 1 "nonimmediate_operand" ""))
7902		(zero_extend:V8SI
7903		  (match_operand:V8HI 2 "nonimmediate_operand" "")))
7904	      (const_vector:V8HI [(const_int 1) (const_int 1)
7905				  (const_int 1) (const_int 1)
7906				  (const_int 1) (const_int 1)
7907				  (const_int 1) (const_int 1)]))
7908	    (const_int 1))))]
7909  "TARGET_SSE2"
7910  "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7911
7912(define_insn "*avx2_uavgv16hi3"
7913  [(set (match_operand:V16HI 0 "register_operand" "=x")
7914	(truncate:V16HI
7915	  (lshiftrt:V16SI
7916	    (plus:V16SI
7917	      (plus:V16SI
7918		(zero_extend:V16SI
7919		  (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7920		(zero_extend:V16SI
7921		  (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7922	      (const_vector:V16HI [(const_int 1) (const_int 1)
7923				   (const_int 1) (const_int 1)
7924				   (const_int 1) (const_int 1)
7925				   (const_int 1) (const_int 1)
7926				   (const_int 1) (const_int 1)
7927				   (const_int 1) (const_int 1)
7928				   (const_int 1) (const_int 1)
7929				   (const_int 1) (const_int 1)]))
7930	    (const_int 1))))]
7931  "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7932  "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7933  [(set_attr "type" "sseiadd")
7934   (set_attr "prefix" "vex")
7935   (set_attr "mode" "OI")])
7936
7937(define_insn "*sse2_uavgv8hi3"
7938  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7939	(truncate:V8HI
7940	  (lshiftrt:V8SI
7941	    (plus:V8SI
7942	      (plus:V8SI
7943		(zero_extend:V8SI
7944		  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7945		(zero_extend:V8SI
7946		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7947	      (const_vector:V8HI [(const_int 1) (const_int 1)
7948				  (const_int 1) (const_int 1)
7949				  (const_int 1) (const_int 1)
7950				  (const_int 1) (const_int 1)]))
7951	    (const_int 1))))]
7952  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7953  "@
7954   pavgw\t{%2, %0|%0, %2}
7955   vpavgw\t{%2, %1, %0|%0, %1, %2}"
7956  [(set_attr "isa" "noavx,avx")
7957   (set_attr "type" "sseiadd")
7958   (set_attr "prefix_data16" "1,*")
7959   (set_attr "prefix" "orig,vex")
7960   (set_attr "mode" "TI")])
7961
7962;; The correct representation for this is absolutely enormous, and
7963;; surely not generally useful.
7964(define_insn "<sse2_avx2>_psadbw"
7965  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7966	(unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7967			  (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7968			  UNSPEC_PSADBW))]
7969  "TARGET_SSE2"
7970  "@
7971   psadbw\t{%2, %0|%0, %2}
7972   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7973  [(set_attr "isa" "noavx,avx")
7974   (set_attr "type" "sseiadd")
7975   (set_attr "atom_unit" "simul")
7976   (set_attr "prefix_data16" "1,*")
7977   (set_attr "prefix" "orig,vex")
7978   (set_attr "mode" "<sseinsnmode>")])
7979
7980(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7981  [(set (match_operand:SI 0 "register_operand" "=r")
7982	(unspec:SI
7983	  [(match_operand:VF 1 "register_operand" "x")]
7984	  UNSPEC_MOVMSK))]
7985  "TARGET_SSE"
7986  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7987  [(set_attr "type" "ssemov")
7988   (set_attr "prefix" "maybe_vex")
7989   (set_attr "mode" "<MODE>")])
7990
7991(define_insn "avx2_pmovmskb"
7992  [(set (match_operand:SI 0 "register_operand" "=r")
7993	(unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7994		   UNSPEC_MOVMSK))]
7995  "TARGET_AVX2"
7996  "vpmovmskb\t{%1, %0|%0, %1}"
7997  [(set_attr "type" "ssemov")
7998   (set_attr "prefix" "vex")
7999   (set_attr "mode" "DI")])
8000
8001(define_insn "sse2_pmovmskb"
8002  [(set (match_operand:SI 0 "register_operand" "=r")
8003	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8004		   UNSPEC_MOVMSK))]
8005  "TARGET_SSE2"
8006  "%vpmovmskb\t{%1, %0|%0, %1}"
8007  [(set_attr "type" "ssemov")
8008   (set_attr "prefix_data16" "1")
8009   (set_attr "prefix" "maybe_vex")
8010   (set_attr "mode" "SI")])
8011
8012(define_expand "sse2_maskmovdqu"
8013  [(set (match_operand:V16QI 0 "memory_operand" "")
8014	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8015		       (match_operand:V16QI 2 "register_operand" "")
8016		       (match_dup 0)]
8017		      UNSPEC_MASKMOV))]
8018  "TARGET_SSE2")
8019
8020(define_insn "*sse2_maskmovdqu"
8021  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8022	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8023		       (match_operand:V16QI 2 "register_operand" "x")
8024		       (mem:V16QI (match_dup 0))]
8025		      UNSPEC_MASKMOV))]
8026  "TARGET_SSE2"
8027  "%vmaskmovdqu\t{%2, %1|%1, %2}"
8028  [(set_attr "type" "ssemov")
8029   (set_attr "prefix_data16" "1")
8030   ;; The implicit %rdi operand confuses default length_vex computation.
8031   (set (attr "length_vex")
8032     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8033   (set_attr "prefix" "maybe_vex")
8034   (set_attr "mode" "TI")])
8035
8036(define_insn "sse_ldmxcsr"
8037  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8038		    UNSPECV_LDMXCSR)]
8039  "TARGET_SSE"
8040  "%vldmxcsr\t%0"
8041  [(set_attr "type" "sse")
8042   (set_attr "atom_sse_attr" "mxcsr")
8043   (set_attr "prefix" "maybe_vex")
8044   (set_attr "memory" "load")])
8045
8046(define_insn "sse_stmxcsr"
8047  [(set (match_operand:SI 0 "memory_operand" "=m")
8048	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8049  "TARGET_SSE"
8050  "%vstmxcsr\t%0"
8051  [(set_attr "type" "sse")
8052   (set_attr "atom_sse_attr" "mxcsr")
8053   (set_attr "prefix" "maybe_vex")
8054   (set_attr "memory" "store")])
8055
8056(define_insn "sse2_clflush"
8057  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8058		    UNSPECV_CLFLUSH)]
8059  "TARGET_SSE2"
8060  "clflush\t%a0"
8061  [(set_attr "type" "sse")
8062   (set_attr "atom_sse_attr" "fence")
8063   (set_attr "memory" "unknown")])
8064
8065
8066(define_insn "sse3_mwait"
8067  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8068		     (match_operand:SI 1 "register_operand" "c")]
8069		    UNSPECV_MWAIT)]
8070  "TARGET_SSE3"
8071;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8072;; Since 32bit register operands are implicitly zero extended to 64bit,
8073;; we only need to set up 32bit registers.
8074  "mwait"
8075  [(set_attr "length" "3")])
8076
8077(define_insn "sse3_monitor"
8078  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8079		     (match_operand:SI 1 "register_operand" "c")
8080		     (match_operand:SI 2 "register_operand" "d")]
8081		    UNSPECV_MONITOR)]
8082  "TARGET_SSE3 && !TARGET_64BIT"
8083  "monitor\t%0, %1, %2"
8084  [(set_attr "length" "3")])
8085
8086(define_insn "sse3_monitor64"
8087  [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8088		     (match_operand:SI 1 "register_operand" "c")
8089		     (match_operand:SI 2 "register_operand" "d")]
8090		    UNSPECV_MONITOR)]
8091  "TARGET_SSE3 && TARGET_64BIT"
8092;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8093;; RCX and RDX are used.  Since 32bit register operands are implicitly
8094;; zero extended to 64bit, we only need to set up 32bit registers.
8095  "monitor"
8096  [(set_attr "length" "3")])
8097
8098;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8099;;
8100;; SSSE3 instructions
8101;;
8102;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8103
8104(define_insn "avx2_phaddwv16hi3"
8105  [(set (match_operand:V16HI 0 "register_operand" "=x")
8106	(vec_concat:V16HI
8107	  (vec_concat:V8HI
8108	    (vec_concat:V4HI
8109	      (vec_concat:V2HI
8110		(plus:HI
8111		  (vec_select:HI
8112		    (match_operand:V16HI 1 "register_operand" "x")
8113		    (parallel [(const_int 0)]))
8114		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8115		(plus:HI
8116		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8117		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8118	      (vec_concat:V2HI
8119		(plus:HI
8120		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8121		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8122		(plus:HI
8123		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8124		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8125	    (vec_concat:V4HI
8126	      (vec_concat:V2HI
8127		(plus:HI
8128		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8129		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8130		(plus:HI
8131		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8132		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8133	      (vec_concat:V2HI
8134		(plus:HI
8135		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8136		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8137		(plus:HI
8138		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8139		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8140	  (vec_concat:V8HI
8141	    (vec_concat:V4HI
8142	      (vec_concat:V2HI
8143		(plus:HI
8144		  (vec_select:HI
8145		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8146		    (parallel [(const_int 0)]))
8147		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8148		(plus:HI
8149		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8150		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8151	      (vec_concat:V2HI
8152		(plus:HI
8153		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8154		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8155		(plus:HI
8156		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8157		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8158	    (vec_concat:V4HI
8159	      (vec_concat:V2HI
8160		(plus:HI
8161		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8162		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8163		(plus:HI
8164		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8165		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8166	      (vec_concat:V2HI
8167		(plus:HI
8168		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8169		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8170		(plus:HI
8171		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8172		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8173  "TARGET_AVX2"
8174  "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8175  [(set_attr "type" "sseiadd")
8176   (set_attr "prefix_extra" "1")
8177   (set_attr "prefix" "vex")
8178   (set_attr "mode" "OI")])
8179
8180(define_insn "ssse3_phaddwv8hi3"
8181  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8182	(vec_concat:V8HI
8183	  (vec_concat:V4HI
8184	    (vec_concat:V2HI
8185	      (plus:HI
8186		(vec_select:HI
8187		  (match_operand:V8HI 1 "register_operand" "0,x")
8188		  (parallel [(const_int 0)]))
8189		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8190	      (plus:HI
8191		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8192		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8193	    (vec_concat:V2HI
8194	      (plus:HI
8195		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8196		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8197	      (plus:HI
8198		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8199		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8200	  (vec_concat:V4HI
8201	    (vec_concat:V2HI
8202	      (plus:HI
8203		(vec_select:HI
8204		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8205		  (parallel [(const_int 0)]))
8206		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8207	      (plus:HI
8208		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8209		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8210	    (vec_concat:V2HI
8211	      (plus:HI
8212		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8213		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8214	      (plus:HI
8215		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8216		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8217  "TARGET_SSSE3"
8218  "@
8219   phaddw\t{%2, %0|%0, %2}
8220   vphaddw\t{%2, %1, %0|%0, %1, %2}"
8221  [(set_attr "isa" "noavx,avx")
8222   (set_attr "type" "sseiadd")
8223   (set_attr "atom_unit" "complex")
8224   (set_attr "prefix_data16" "1,*")
8225   (set_attr "prefix_extra" "1")
8226   (set_attr "prefix" "orig,vex")
8227   (set_attr "mode" "TI")])
8228
8229(define_insn "ssse3_phaddwv4hi3"
8230  [(set (match_operand:V4HI 0 "register_operand" "=y")
8231	(vec_concat:V4HI
8232	  (vec_concat:V2HI
8233	    (plus:HI
8234	      (vec_select:HI
8235		(match_operand:V4HI 1 "register_operand" "0")
8236		(parallel [(const_int 0)]))
8237	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8238	    (plus:HI
8239	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8240	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8241	  (vec_concat:V2HI
8242	    (plus:HI
8243	      (vec_select:HI
8244		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8245		(parallel [(const_int 0)]))
8246	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8247	    (plus:HI
8248	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8249	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8250  "TARGET_SSSE3"
8251  "phaddw\t{%2, %0|%0, %2}"
8252  [(set_attr "type" "sseiadd")
8253   (set_attr "atom_unit" "complex")
8254   (set_attr "prefix_extra" "1")
8255   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8256   (set_attr "mode" "DI")])
8257
8258(define_insn "avx2_phadddv8si3"
8259  [(set (match_operand:V8SI 0 "register_operand" "=x")
8260	(vec_concat:V8SI
8261	  (vec_concat:V4SI
8262	    (vec_concat:V2SI
8263	      (plus:SI
8264		(vec_select:SI
8265		  (match_operand:V8SI 1 "register_operand" "x")
8266		  (parallel [(const_int 0)]))
8267		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8268	      (plus:SI
8269		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8270		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8271	    (vec_concat:V2SI
8272	      (plus:SI
8273		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8274		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8275	      (plus:SI
8276		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8277		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8278	  (vec_concat:V4SI
8279	    (vec_concat:V2SI
8280	      (plus:SI
8281		(vec_select:SI
8282		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8283		  (parallel [(const_int 0)]))
8284		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8285	      (plus:SI
8286		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8287		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8288	    (vec_concat:V2SI
8289	      (plus:SI
8290		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8291		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8292	      (plus:SI
8293		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8294		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8295  "TARGET_AVX2"
8296  "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8297  [(set_attr "type" "sseiadd")
8298   (set_attr "prefix_extra" "1")
8299   (set_attr "prefix" "vex")
8300   (set_attr "mode" "OI")])
8301
8302(define_insn "ssse3_phadddv4si3"
8303  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8304	(vec_concat:V4SI
8305	  (vec_concat:V2SI
8306	    (plus:SI
8307	      (vec_select:SI
8308		(match_operand:V4SI 1 "register_operand" "0,x")
8309		(parallel [(const_int 0)]))
8310	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8311	    (plus:SI
8312	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8313	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8314	  (vec_concat:V2SI
8315	    (plus:SI
8316	      (vec_select:SI
8317		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8318		(parallel [(const_int 0)]))
8319	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8320	    (plus:SI
8321	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8322	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8323  "TARGET_SSSE3"
8324  "@
8325   phaddd\t{%2, %0|%0, %2}
8326   vphaddd\t{%2, %1, %0|%0, %1, %2}"
8327  [(set_attr "isa" "noavx,avx")
8328   (set_attr "type" "sseiadd")
8329   (set_attr "atom_unit" "complex")
8330   (set_attr "prefix_data16" "1,*")
8331   (set_attr "prefix_extra" "1")
8332   (set_attr "prefix" "orig,vex")
8333   (set_attr "mode" "TI")])
8334
8335(define_insn "ssse3_phadddv2si3"
8336  [(set (match_operand:V2SI 0 "register_operand" "=y")
8337	(vec_concat:V2SI
8338	  (plus:SI
8339	    (vec_select:SI
8340	      (match_operand:V2SI 1 "register_operand" "0")
8341	      (parallel [(const_int 0)]))
8342	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8343	  (plus:SI
8344	    (vec_select:SI
8345	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8346	      (parallel [(const_int 0)]))
8347	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8348  "TARGET_SSSE3"
8349  "phaddd\t{%2, %0|%0, %2}"
8350  [(set_attr "type" "sseiadd")
8351   (set_attr "atom_unit" "complex")
8352   (set_attr "prefix_extra" "1")
8353   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8354   (set_attr "mode" "DI")])
8355
8356(define_insn "avx2_phaddswv16hi3"
8357  [(set (match_operand:V16HI 0 "register_operand" "=x")
8358	(vec_concat:V16HI
8359	  (vec_concat:V8HI
8360	    (vec_concat:V4HI
8361	      (vec_concat:V2HI
8362		(ss_plus:HI
8363		  (vec_select:HI
8364		    (match_operand:V16HI 1 "register_operand" "x")
8365		    (parallel [(const_int 0)]))
8366		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8367		(ss_plus:HI
8368		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8369		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8370	      (vec_concat:V2HI
8371		(ss_plus:HI
8372		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8373		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8374		(ss_plus:HI
8375		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8376		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8377	    (vec_concat:V4HI
8378	      (vec_concat:V2HI
8379		(ss_plus:HI
8380		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8381		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8382		(ss_plus:HI
8383		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8384		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8385	      (vec_concat:V2HI
8386		(ss_plus:HI
8387		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8388		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8389		(ss_plus:HI
8390		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8391		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8392	  (vec_concat:V8HI
8393	    (vec_concat:V4HI
8394	      (vec_concat:V2HI
8395		(ss_plus:HI
8396		  (vec_select:HI
8397		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8398		    (parallel [(const_int 0)]))
8399		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8400		(ss_plus:HI
8401		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8402		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8403	      (vec_concat:V2HI
8404		(ss_plus:HI
8405		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8406		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8407		(ss_plus:HI
8408		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8409		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8410	    (vec_concat:V4HI
8411	      (vec_concat:V2HI
8412		(ss_plus:HI
8413		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8414		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8415		(ss_plus:HI
8416		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8417		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8418	      (vec_concat:V2HI
8419		(ss_plus:HI
8420		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8421		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8422		(ss_plus:HI
8423		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8424		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8425  "TARGET_AVX2"
8426  "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8427  [(set_attr "type" "sseiadd")
8428   (set_attr "prefix_extra" "1")
8429   (set_attr "prefix" "vex")
8430   (set_attr "mode" "OI")])
8431
8432(define_insn "ssse3_phaddswv8hi3"
8433  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8434	(vec_concat:V8HI
8435	  (vec_concat:V4HI
8436	    (vec_concat:V2HI
8437	      (ss_plus:HI
8438		(vec_select:HI
8439		  (match_operand:V8HI 1 "register_operand" "0,x")
8440		  (parallel [(const_int 0)]))
8441		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8442	      (ss_plus:HI
8443		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8444		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8445	    (vec_concat:V2HI
8446	      (ss_plus:HI
8447		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8448		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8449	      (ss_plus:HI
8450		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8451		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8452	  (vec_concat:V4HI
8453	    (vec_concat:V2HI
8454	      (ss_plus:HI
8455		(vec_select:HI
8456		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8457		  (parallel [(const_int 0)]))
8458		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8459	      (ss_plus:HI
8460		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8461		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8462	    (vec_concat:V2HI
8463	      (ss_plus:HI
8464		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8465		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8466	      (ss_plus:HI
8467		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8468		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8469  "TARGET_SSSE3"
8470  "@
8471   phaddsw\t{%2, %0|%0, %2}
8472   vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8473  [(set_attr "isa" "noavx,avx")
8474   (set_attr "type" "sseiadd")
8475   (set_attr "atom_unit" "complex")
8476   (set_attr "prefix_data16" "1,*")
8477   (set_attr "prefix_extra" "1")
8478   (set_attr "prefix" "orig,vex")
8479   (set_attr "mode" "TI")])
8480
8481(define_insn "ssse3_phaddswv4hi3"
8482  [(set (match_operand:V4HI 0 "register_operand" "=y")
8483	(vec_concat:V4HI
8484	  (vec_concat:V2HI
8485	    (ss_plus:HI
8486	      (vec_select:HI
8487		(match_operand:V4HI 1 "register_operand" "0")
8488		(parallel [(const_int 0)]))
8489	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8490	    (ss_plus:HI
8491	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8492	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8493	  (vec_concat:V2HI
8494	    (ss_plus:HI
8495	      (vec_select:HI
8496		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8497		(parallel [(const_int 0)]))
8498	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8499	    (ss_plus:HI
8500	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8501	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8502  "TARGET_SSSE3"
8503  "phaddsw\t{%2, %0|%0, %2}"
8504  [(set_attr "type" "sseiadd")
8505   (set_attr "atom_unit" "complex")
8506   (set_attr "prefix_extra" "1")
8507   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8508   (set_attr "mode" "DI")])
8509
8510(define_insn "avx2_phsubwv16hi3"
8511  [(set (match_operand:V16HI 0 "register_operand" "=x")
8512	(vec_concat:V16HI
8513	  (vec_concat:V8HI
8514	    (vec_concat:V4HI
8515	      (vec_concat:V2HI
8516		(minus:HI
8517		  (vec_select:HI
8518		    (match_operand:V16HI 1 "register_operand" "x")
8519		    (parallel [(const_int 0)]))
8520		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8521		(minus:HI
8522		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8523		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8524	      (vec_concat:V2HI
8525		(minus:HI
8526		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8527		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8528		(minus:HI
8529		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8530		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8531	    (vec_concat:V4HI
8532	      (vec_concat:V2HI
8533		(minus:HI
8534		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8535		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8536		(minus:HI
8537		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8538		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8539	      (vec_concat:V2HI
8540		(minus:HI
8541		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8542		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8543		(minus:HI
8544		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8545		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8546	  (vec_concat:V8HI
8547	    (vec_concat:V4HI
8548	      (vec_concat:V2HI
8549		(minus:HI
8550		  (vec_select:HI
8551		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8552		    (parallel [(const_int 0)]))
8553		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8554		(minus:HI
8555		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8556		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8557	      (vec_concat:V2HI
8558		(minus:HI
8559		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8560		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8561		(minus:HI
8562		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8563		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8564	    (vec_concat:V4HI
8565	      (vec_concat:V2HI
8566		(minus:HI
8567		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8568		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8569		(minus:HI
8570		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8571		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8572	      (vec_concat:V2HI
8573		(minus:HI
8574		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8575		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8576		(minus:HI
8577		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8578		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8579  "TARGET_AVX2"
8580  "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8581  [(set_attr "type" "sseiadd")
8582   (set_attr "prefix_extra" "1")
8583   (set_attr "prefix" "vex")
8584   (set_attr "mode" "OI")])
8585
8586(define_insn "ssse3_phsubwv8hi3"
8587  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8588	(vec_concat:V8HI
8589	  (vec_concat:V4HI
8590	    (vec_concat:V2HI
8591	      (minus:HI
8592		(vec_select:HI
8593		  (match_operand:V8HI 1 "register_operand" "0,x")
8594		  (parallel [(const_int 0)]))
8595		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8596	      (minus:HI
8597		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8598		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8599	    (vec_concat:V2HI
8600	      (minus:HI
8601		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8602		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8603	      (minus:HI
8604		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8605		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8606	  (vec_concat:V4HI
8607	    (vec_concat:V2HI
8608	      (minus:HI
8609		(vec_select:HI
8610		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8611		  (parallel [(const_int 0)]))
8612		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8613	      (minus:HI
8614		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8615		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8616	    (vec_concat:V2HI
8617	      (minus:HI
8618		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8619		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8620	      (minus:HI
8621		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8622		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8623  "TARGET_SSSE3"
8624  "@
8625   phsubw\t{%2, %0|%0, %2}
8626   vphsubw\t{%2, %1, %0|%0, %1, %2}"
8627  [(set_attr "isa" "noavx,avx")
8628   (set_attr "type" "sseiadd")
8629   (set_attr "atom_unit" "complex")
8630   (set_attr "prefix_data16" "1,*")
8631   (set_attr "prefix_extra" "1")
8632   (set_attr "prefix" "orig,vex")
8633   (set_attr "mode" "TI")])
8634
8635(define_insn "ssse3_phsubwv4hi3"
8636  [(set (match_operand:V4HI 0 "register_operand" "=y")
8637	(vec_concat:V4HI
8638	  (vec_concat:V2HI
8639	    (minus:HI
8640	      (vec_select:HI
8641		(match_operand:V4HI 1 "register_operand" "0")
8642		(parallel [(const_int 0)]))
8643	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8644	    (minus:HI
8645	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8646	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8647	  (vec_concat:V2HI
8648	    (minus:HI
8649	      (vec_select:HI
8650		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8651		(parallel [(const_int 0)]))
8652	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8653	    (minus:HI
8654	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8655	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8656  "TARGET_SSSE3"
8657  "phsubw\t{%2, %0|%0, %2}"
8658  [(set_attr "type" "sseiadd")
8659   (set_attr "atom_unit" "complex")
8660   (set_attr "prefix_extra" "1")
8661   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8662   (set_attr "mode" "DI")])
8663
8664(define_insn "avx2_phsubdv8si3"
8665  [(set (match_operand:V8SI 0 "register_operand" "=x")
8666	(vec_concat:V8SI
8667	  (vec_concat:V4SI
8668	    (vec_concat:V2SI
8669	      (minus:SI
8670		(vec_select:SI
8671		  (match_operand:V8SI 1 "register_operand" "x")
8672		  (parallel [(const_int 0)]))
8673		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8674	      (minus:SI
8675		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8676		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8677	    (vec_concat:V2SI
8678	      (minus:SI
8679		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8680		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8681	      (minus:SI
8682		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8683		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8684	  (vec_concat:V4SI
8685	    (vec_concat:V2SI
8686	      (minus:SI
8687		(vec_select:SI
8688		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8689		  (parallel [(const_int 0)]))
8690		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8691	      (minus:SI
8692		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8693		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8694	    (vec_concat:V2SI
8695	      (minus:SI
8696		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8697		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8698	      (minus:SI
8699		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8700		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8701  "TARGET_AVX2"
8702  "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8703  [(set_attr "type" "sseiadd")
8704   (set_attr "prefix_extra" "1")
8705   (set_attr "prefix" "vex")
8706   (set_attr "mode" "OI")])
8707
8708(define_insn "ssse3_phsubdv4si3"
8709  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8710	(vec_concat:V4SI
8711	  (vec_concat:V2SI
8712	    (minus:SI
8713	      (vec_select:SI
8714		(match_operand:V4SI 1 "register_operand" "0,x")
8715		(parallel [(const_int 0)]))
8716	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8717	    (minus:SI
8718	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8719	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8720	  (vec_concat:V2SI
8721	    (minus:SI
8722	      (vec_select:SI
8723		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8724		(parallel [(const_int 0)]))
8725	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8726	    (minus:SI
8727	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8728	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8729  "TARGET_SSSE3"
8730  "@
8731   phsubd\t{%2, %0|%0, %2}
8732   vphsubd\t{%2, %1, %0|%0, %1, %2}"
8733
8734  [(set_attr "isa" "noavx,avx")
8735   (set_attr "type" "sseiadd")
8736   (set_attr "atom_unit" "complex")
8737   (set_attr "prefix_data16" "1,*")
8738   (set_attr "prefix_extra" "1")
8739   (set_attr "prefix" "orig,vex")
8740   (set_attr "mode" "TI")])
8741
8742(define_insn "ssse3_phsubdv2si3"
8743  [(set (match_operand:V2SI 0 "register_operand" "=y")
8744	(vec_concat:V2SI
8745	  (minus:SI
8746	    (vec_select:SI
8747	      (match_operand:V2SI 1 "register_operand" "0")
8748	      (parallel [(const_int 0)]))
8749	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8750	  (minus:SI
8751	    (vec_select:SI
8752	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8753	      (parallel [(const_int 0)]))
8754	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8755  "TARGET_SSSE3"
8756  "phsubd\t{%2, %0|%0, %2}"
8757  [(set_attr "type" "sseiadd")
8758   (set_attr "atom_unit" "complex")
8759   (set_attr "prefix_extra" "1")
8760   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8761   (set_attr "mode" "DI")])
8762
8763(define_insn "avx2_phsubswv16hi3"
8764  [(set (match_operand:V16HI 0 "register_operand" "=x")
8765	(vec_concat:V16HI
8766	  (vec_concat:V8HI
8767	    (vec_concat:V4HI
8768	      (vec_concat:V2HI
8769		(ss_minus:HI
8770		  (vec_select:HI
8771		    (match_operand:V16HI 1 "register_operand" "x")
8772		    (parallel [(const_int 0)]))
8773		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8774		(ss_minus:HI
8775		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8776		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8777	      (vec_concat:V2HI
8778		(ss_minus:HI
8779		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8780		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8781		(ss_minus:HI
8782		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8783		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8784	    (vec_concat:V4HI
8785	      (vec_concat:V2HI
8786		(ss_minus:HI
8787		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8788		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8789		(ss_minus:HI
8790		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8791		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8792	      (vec_concat:V2HI
8793		(ss_minus:HI
8794		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8795		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8796		(ss_minus:HI
8797		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8798		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8799	  (vec_concat:V8HI
8800	    (vec_concat:V4HI
8801	      (vec_concat:V2HI
8802		(ss_minus:HI
8803		  (vec_select:HI
8804		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8805		    (parallel [(const_int 0)]))
8806		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8807		(ss_minus:HI
8808		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8809		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8810	      (vec_concat:V2HI
8811		(ss_minus:HI
8812		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8813		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8814		(ss_minus:HI
8815		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8816		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8817	    (vec_concat:V4HI
8818	      (vec_concat:V2HI
8819		(ss_minus:HI
8820		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8821		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8822		(ss_minus:HI
8823		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8824		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8825	      (vec_concat:V2HI
8826		(ss_minus:HI
8827		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8828		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8829		(ss_minus:HI
8830		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8831		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8832  "TARGET_AVX2"
8833  "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8834  [(set_attr "type" "sseiadd")
8835   (set_attr "prefix_extra" "1")
8836   (set_attr "prefix" "vex")
8837   (set_attr "mode" "OI")])
8838
8839(define_insn "ssse3_phsubswv8hi3"
8840  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8841	(vec_concat:V8HI
8842	  (vec_concat:V4HI
8843	    (vec_concat:V2HI
8844	      (ss_minus:HI
8845		(vec_select:HI
8846		  (match_operand:V8HI 1 "register_operand" "0,x")
8847		  (parallel [(const_int 0)]))
8848		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8849	      (ss_minus:HI
8850		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8851		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8852	    (vec_concat:V2HI
8853	      (ss_minus:HI
8854		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8855		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8856	      (ss_minus:HI
8857		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8858		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8859	  (vec_concat:V4HI
8860	    (vec_concat:V2HI
8861	      (ss_minus:HI
8862		(vec_select:HI
8863		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8864		  (parallel [(const_int 0)]))
8865		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8866	      (ss_minus:HI
8867		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8868		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8869	    (vec_concat:V2HI
8870	      (ss_minus:HI
8871		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8872		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8873	      (ss_minus:HI
8874		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8875		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8876  "TARGET_SSSE3"
8877  "@
8878   phsubsw\t{%2, %0|%0, %2}
8879   vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8880  [(set_attr "isa" "noavx,avx")
8881   (set_attr "type" "sseiadd")
8882   (set_attr "atom_unit" "complex")
8883   (set_attr "prefix_data16" "1,*")
8884   (set_attr "prefix_extra" "1")
8885   (set_attr "prefix" "orig,vex")
8886   (set_attr "mode" "TI")])
8887
8888(define_insn "ssse3_phsubswv4hi3"
8889  [(set (match_operand:V4HI 0 "register_operand" "=y")
8890	(vec_concat:V4HI
8891	  (vec_concat:V2HI
8892	    (ss_minus:HI
8893	      (vec_select:HI
8894		(match_operand:V4HI 1 "register_operand" "0")
8895		(parallel [(const_int 0)]))
8896	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8897	    (ss_minus:HI
8898	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8899	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8900	  (vec_concat:V2HI
8901	    (ss_minus:HI
8902	      (vec_select:HI
8903		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
8904		(parallel [(const_int 0)]))
8905	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8906	    (ss_minus:HI
8907	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8908	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8909  "TARGET_SSSE3"
8910  "phsubsw\t{%2, %0|%0, %2}"
8911  [(set_attr "type" "sseiadd")
8912   (set_attr "atom_unit" "complex")
8913   (set_attr "prefix_extra" "1")
8914   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8915   (set_attr "mode" "DI")])
8916
8917(define_insn "avx2_pmaddubsw256"
8918  [(set (match_operand:V16HI 0 "register_operand" "=x")
8919	(ss_plus:V16HI
8920	  (mult:V16HI
8921	    (zero_extend:V16HI
8922	      (vec_select:V16QI
8923		(match_operand:V32QI 1 "register_operand" "x")
8924		(parallel [(const_int 0)
8925			   (const_int 2)
8926			   (const_int 4)
8927			   (const_int 6)
8928			   (const_int 8)
8929			   (const_int 10)
8930			   (const_int 12)
8931			   (const_int 14)
8932			   (const_int 16)
8933			   (const_int 18)
8934			   (const_int 20)
8935			   (const_int 22)
8936			   (const_int 24)
8937			   (const_int 26)
8938			   (const_int 28)
8939			   (const_int 30)])))
8940	    (sign_extend:V16HI
8941	      (vec_select:V16QI
8942		(match_operand:V32QI 2 "nonimmediate_operand" "xm")
8943		(parallel [(const_int 0)
8944			   (const_int 2)
8945			   (const_int 4)
8946			   (const_int 6)
8947			   (const_int 8)
8948			   (const_int 10)
8949			   (const_int 12)
8950			   (const_int 14)
8951			   (const_int 16)
8952			   (const_int 18)
8953			   (const_int 20)
8954			   (const_int 22)
8955			   (const_int 24)
8956			   (const_int 26)
8957			   (const_int 28)
8958			   (const_int 30)]))))
8959	  (mult:V16HI
8960	    (zero_extend:V16HI
8961	      (vec_select:V16QI (match_dup 1)
8962		(parallel [(const_int 1)
8963			   (const_int 3)
8964			   (const_int 5)
8965			   (const_int 7)
8966			   (const_int 9)
8967			   (const_int 11)
8968			   (const_int 13)
8969			   (const_int 15)
8970			   (const_int 17)
8971			   (const_int 19)
8972			   (const_int 21)
8973			   (const_int 23)
8974			   (const_int 25)
8975			   (const_int 27)
8976			   (const_int 29)
8977			   (const_int 31)])))
8978	    (sign_extend:V16HI
8979	      (vec_select:V16QI (match_dup 2)
8980		(parallel [(const_int 1)
8981			   (const_int 3)
8982			   (const_int 5)
8983			   (const_int 7)
8984			   (const_int 9)
8985			   (const_int 11)
8986			   (const_int 13)
8987			   (const_int 15)
8988			   (const_int 17)
8989			   (const_int 19)
8990			   (const_int 21)
8991			   (const_int 23)
8992			   (const_int 25)
8993			   (const_int 27)
8994			   (const_int 29)
8995			   (const_int 31)]))))))]
8996  "TARGET_AVX2"
8997  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8998  [(set_attr "type" "sseiadd")
8999   (set_attr "prefix_extra" "1")
9000   (set_attr "prefix" "vex")
9001   (set_attr "mode" "OI")])
9002
9003(define_insn "ssse3_pmaddubsw128"
9004  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9005	(ss_plus:V8HI
9006	  (mult:V8HI
9007	    (zero_extend:V8HI
9008	      (vec_select:V8QI
9009		(match_operand:V16QI 1 "register_operand" "0,x")
9010		(parallel [(const_int 0)
9011			   (const_int 2)
9012			   (const_int 4)
9013			   (const_int 6)
9014			   (const_int 8)
9015			   (const_int 10)
9016			   (const_int 12)
9017			   (const_int 14)])))
9018	    (sign_extend:V8HI
9019	      (vec_select:V8QI
9020		(match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9021		(parallel [(const_int 0)
9022			   (const_int 2)
9023			   (const_int 4)
9024			   (const_int 6)
9025			   (const_int 8)
9026			   (const_int 10)
9027			   (const_int 12)
9028			   (const_int 14)]))))
9029	  (mult:V8HI
9030	    (zero_extend:V8HI
9031	      (vec_select:V8QI (match_dup 1)
9032		(parallel [(const_int 1)
9033			   (const_int 3)
9034			   (const_int 5)
9035			   (const_int 7)
9036			   (const_int 9)
9037			   (const_int 11)
9038			   (const_int 13)
9039			   (const_int 15)])))
9040	    (sign_extend:V8HI
9041	      (vec_select:V8QI (match_dup 2)
9042		(parallel [(const_int 1)
9043			   (const_int 3)
9044			   (const_int 5)
9045			   (const_int 7)
9046			   (const_int 9)
9047			   (const_int 11)
9048			   (const_int 13)
9049			   (const_int 15)]))))))]
9050  "TARGET_SSSE3"
9051  "@
9052   pmaddubsw\t{%2, %0|%0, %2}
9053   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9054  [(set_attr "isa" "noavx,avx")
9055   (set_attr "type" "sseiadd")
9056   (set_attr "atom_unit" "simul")
9057   (set_attr "prefix_data16" "1,*")
9058   (set_attr "prefix_extra" "1")
9059   (set_attr "prefix" "orig,vex")
9060   (set_attr "mode" "TI")])
9061
9062(define_insn "ssse3_pmaddubsw"
9063  [(set (match_operand:V4HI 0 "register_operand" "=y")
9064	(ss_plus:V4HI
9065	  (mult:V4HI
9066	    (zero_extend:V4HI
9067	      (vec_select:V4QI
9068		(match_operand:V8QI 1 "register_operand" "0")
9069		(parallel [(const_int 0)
9070			   (const_int 2)
9071			   (const_int 4)
9072			   (const_int 6)])))
9073	    (sign_extend:V4HI
9074	      (vec_select:V4QI
9075		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
9076		(parallel [(const_int 0)
9077			   (const_int 2)
9078			   (const_int 4)
9079			   (const_int 6)]))))
9080	  (mult:V4HI
9081	    (zero_extend:V4HI
9082	      (vec_select:V4QI (match_dup 1)
9083		(parallel [(const_int 1)
9084			   (const_int 3)
9085			   (const_int 5)
9086			   (const_int 7)])))
9087	    (sign_extend:V4HI
9088	      (vec_select:V4QI (match_dup 2)
9089		(parallel [(const_int 1)
9090			   (const_int 3)
9091			   (const_int 5)
9092			   (const_int 7)]))))))]
9093  "TARGET_SSSE3"
9094  "pmaddubsw\t{%2, %0|%0, %2}"
9095  [(set_attr "type" "sseiadd")
9096   (set_attr "atom_unit" "simul")
9097   (set_attr "prefix_extra" "1")
9098   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9099   (set_attr "mode" "DI")])
9100
9101(define_expand "avx2_umulhrswv16hi3"
9102  [(set (match_operand:V16HI 0 "register_operand" "")
9103	(truncate:V16HI
9104	  (lshiftrt:V16SI
9105	    (plus:V16SI
9106	      (lshiftrt:V16SI
9107		(mult:V16SI
9108		  (sign_extend:V16SI
9109		    (match_operand:V16HI 1 "nonimmediate_operand" ""))
9110		  (sign_extend:V16SI
9111		    (match_operand:V16HI 2 "nonimmediate_operand" "")))
9112		(const_int 14))
9113	      (const_vector:V16HI [(const_int 1) (const_int 1)
9114				   (const_int 1) (const_int 1)
9115				   (const_int 1) (const_int 1)
9116				   (const_int 1) (const_int 1)
9117				   (const_int 1) (const_int 1)
9118				   (const_int 1) (const_int 1)
9119				   (const_int 1) (const_int 1)
9120				   (const_int 1) (const_int 1)]))
9121	    (const_int 1))))]
9122  "TARGET_AVX2"
9123  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9124
9125(define_insn "*avx2_umulhrswv16hi3"
9126  [(set (match_operand:V16HI 0 "register_operand" "=x")
9127	(truncate:V16HI
9128	  (lshiftrt:V16SI
9129	    (plus:V16SI
9130	      (lshiftrt:V16SI
9131		(mult:V16SI
9132		  (sign_extend:V16SI
9133		    (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9134		  (sign_extend:V16SI
9135		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9136		(const_int 14))
9137	      (const_vector:V16HI [(const_int 1) (const_int 1)
9138				   (const_int 1) (const_int 1)
9139				   (const_int 1) (const_int 1)
9140				   (const_int 1) (const_int 1)
9141				   (const_int 1) (const_int 1)
9142				   (const_int 1) (const_int 1)
9143				   (const_int 1) (const_int 1)
9144				   (const_int 1) (const_int 1)]))
9145	    (const_int 1))))]
9146  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9147  "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9148  [(set_attr "type" "sseimul")
9149   (set_attr "prefix_extra" "1")
9150   (set_attr "prefix" "vex")
9151   (set_attr "mode" "OI")])
9152
9153(define_expand "ssse3_pmulhrswv8hi3"
9154  [(set (match_operand:V8HI 0 "register_operand" "")
9155	(truncate:V8HI
9156	  (lshiftrt:V8SI
9157	    (plus:V8SI
9158	      (lshiftrt:V8SI
9159		(mult:V8SI
9160		  (sign_extend:V8SI
9161		    (match_operand:V8HI 1 "nonimmediate_operand" ""))
9162		  (sign_extend:V8SI
9163		    (match_operand:V8HI 2 "nonimmediate_operand" "")))
9164		(const_int 14))
9165	      (const_vector:V8HI [(const_int 1) (const_int 1)
9166				  (const_int 1) (const_int 1)
9167				  (const_int 1) (const_int 1)
9168				  (const_int 1) (const_int 1)]))
9169	    (const_int 1))))]
9170  "TARGET_SSSE3"
9171  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9172
9173(define_insn "*ssse3_pmulhrswv8hi3"
9174  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9175	(truncate:V8HI
9176	  (lshiftrt:V8SI
9177	    (plus:V8SI
9178	      (lshiftrt:V8SI
9179		(mult:V8SI
9180		  (sign_extend:V8SI
9181		    (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9182		  (sign_extend:V8SI
9183		    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9184		(const_int 14))
9185	      (const_vector:V8HI [(const_int 1) (const_int 1)
9186				  (const_int 1) (const_int 1)
9187				  (const_int 1) (const_int 1)
9188				  (const_int 1) (const_int 1)]))
9189	    (const_int 1))))]
9190  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9191  "@
9192   pmulhrsw\t{%2, %0|%0, %2}
9193   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9194  [(set_attr "isa" "noavx,avx")
9195   (set_attr "type" "sseimul")
9196   (set_attr "prefix_data16" "1,*")
9197   (set_attr "prefix_extra" "1")
9198   (set_attr "prefix" "orig,vex")
9199   (set_attr "mode" "TI")])
9200
9201(define_expand "ssse3_pmulhrswv4hi3"
9202  [(set (match_operand:V4HI 0 "register_operand" "")
9203	(truncate:V4HI
9204	  (lshiftrt:V4SI
9205	    (plus:V4SI
9206	      (lshiftrt:V4SI
9207		(mult:V4SI
9208		  (sign_extend:V4SI
9209		    (match_operand:V4HI 1 "nonimmediate_operand" ""))
9210		  (sign_extend:V4SI
9211		    (match_operand:V4HI 2 "nonimmediate_operand" "")))
9212		(const_int 14))
9213	      (const_vector:V4HI [(const_int 1) (const_int 1)
9214				  (const_int 1) (const_int 1)]))
9215	    (const_int 1))))]
9216  "TARGET_SSSE3"
9217  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9218
9219(define_insn "*ssse3_pmulhrswv4hi3"
9220  [(set (match_operand:V4HI 0 "register_operand" "=y")
9221	(truncate:V4HI
9222	  (lshiftrt:V4SI
9223	    (plus:V4SI
9224	      (lshiftrt:V4SI
9225		(mult:V4SI
9226		  (sign_extend:V4SI
9227		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9228		  (sign_extend:V4SI
9229		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9230		(const_int 14))
9231	      (const_vector:V4HI [(const_int 1) (const_int 1)
9232				  (const_int 1) (const_int 1)]))
9233	    (const_int 1))))]
9234  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9235  "pmulhrsw\t{%2, %0|%0, %2}"
9236  [(set_attr "type" "sseimul")
9237   (set_attr "prefix_extra" "1")
9238   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9239   (set_attr "mode" "DI")])
9240
9241(define_insn "<ssse3_avx2>_pshufb<mode>3"
9242  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9243	(unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9244			  (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9245			 UNSPEC_PSHUFB))]
9246  "TARGET_SSSE3"
9247  "@
9248   pshufb\t{%2, %0|%0, %2}
9249   vpshufb\t{%2, %1, %0|%0, %1, %2}"
9250  [(set_attr "isa" "noavx,avx")
9251   (set_attr "type" "sselog1")
9252   (set_attr "prefix_data16" "1,*")
9253   (set_attr "prefix_extra" "1")
9254   (set_attr "prefix" "orig,vex")
9255   (set_attr "mode" "<sseinsnmode>")])
9256
9257(define_insn "ssse3_pshufbv8qi3"
9258  [(set (match_operand:V8QI 0 "register_operand" "=y")
9259	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9260		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9261		     UNSPEC_PSHUFB))]
9262  "TARGET_SSSE3"
9263  "pshufb\t{%2, %0|%0, %2}";
9264  [(set_attr "type" "sselog1")
9265   (set_attr "prefix_extra" "1")
9266   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9267   (set_attr "mode" "DI")])
9268
9269(define_insn "<ssse3_avx2>_psign<mode>3"
9270  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9271	(unspec:VI124_AVX2
9272	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9273	   (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9274	  UNSPEC_PSIGN))]
9275  "TARGET_SSSE3"
9276  "@
9277   psign<ssemodesuffix>\t{%2, %0|%0, %2}
9278   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9279  [(set_attr "isa" "noavx,avx")
9280   (set_attr "type" "sselog1")
9281   (set_attr "prefix_data16" "1,*")
9282   (set_attr "prefix_extra" "1")
9283   (set_attr "prefix" "orig,vex")
9284   (set_attr "mode" "<sseinsnmode>")])
9285
9286(define_insn "ssse3_psign<mode>3"
9287  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9288	(unspec:MMXMODEI
9289	  [(match_operand:MMXMODEI 1 "register_operand" "0")
9290	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9291	  UNSPEC_PSIGN))]
9292  "TARGET_SSSE3"
9293  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9294  [(set_attr "type" "sselog1")
9295   (set_attr "prefix_extra" "1")
9296   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9297   (set_attr "mode" "DI")])
9298
9299(define_insn "<ssse3_avx2>_palignr<mode>"
9300  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9301	(unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9302			       (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9303			       (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9304			      UNSPEC_PALIGNR))]
9305  "TARGET_SSSE3"
9306{
9307  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9308
9309  switch (which_alternative)
9310    {
9311    case 0:
9312      return "palignr\t{%3, %2, %0|%0, %2, %3}";
9313    case 1:
9314      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9315    default:
9316      gcc_unreachable ();
9317    }
9318}
9319  [(set_attr "isa" "noavx,avx")
9320   (set_attr "type" "sseishft")
9321   (set_attr "atom_unit" "sishuf")
9322   (set_attr "prefix_data16" "1,*")
9323   (set_attr "prefix_extra" "1")
9324   (set_attr "length_immediate" "1")
9325   (set_attr "prefix" "orig,vex")
9326   (set_attr "mode" "<sseinsnmode>")])
9327
9328(define_insn "ssse3_palignrdi"
9329  [(set (match_operand:DI 0 "register_operand" "=y")
9330	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
9331		    (match_operand:DI 2 "nonimmediate_operand" "ym")
9332		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9333		   UNSPEC_PALIGNR))]
9334  "TARGET_SSSE3"
9335{
9336  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9337  return "palignr\t{%3, %2, %0|%0, %2, %3}";
9338}
9339  [(set_attr "type" "sseishft")
9340   (set_attr "atom_unit" "sishuf")
9341   (set_attr "prefix_extra" "1")
9342   (set_attr "length_immediate" "1")
9343   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9344   (set_attr "mode" "DI")])
9345
9346(define_insn "abs<mode>2"
9347  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9348	(abs:VI124_AVX2
9349	  (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9350  "TARGET_SSSE3"
9351  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9352  [(set_attr "type" "sselog1")
9353   (set_attr "prefix_data16" "1")
9354   (set_attr "prefix_extra" "1")
9355   (set_attr "prefix" "maybe_vex")
9356   (set_attr "mode" "<sseinsnmode>")])
9357
9358(define_insn "abs<mode>2"
9359  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9360	(abs:MMXMODEI
9361	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9362  "TARGET_SSSE3"
9363  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9364  [(set_attr "type" "sselog1")
9365   (set_attr "prefix_rep" "0")
9366   (set_attr "prefix_extra" "1")
9367   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9368   (set_attr "mode" "DI")])
9369
9370;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9371;;
9372;; AMD SSE4A instructions
9373;;
9374;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9375
9376(define_insn "sse4a_movnt<mode>"
9377  [(set (match_operand:MODEF 0 "memory_operand" "=m")
9378	(unspec:MODEF
9379	  [(match_operand:MODEF 1 "register_operand" "x")]
9380	  UNSPEC_MOVNT))]
9381  "TARGET_SSE4A"
9382  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9383  [(set_attr "type" "ssemov")
9384   (set_attr "mode" "<MODE>")])
9385
9386(define_insn "sse4a_vmmovnt<mode>"
9387  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9388	(unspec:<ssescalarmode>
9389	  [(vec_select:<ssescalarmode>
9390	     (match_operand:VF_128 1 "register_operand" "x")
9391	     (parallel [(const_int 0)]))]
9392	  UNSPEC_MOVNT))]
9393  "TARGET_SSE4A"
9394  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9395  [(set_attr "type" "ssemov")
9396   (set_attr "mode" "<ssescalarmode>")])
9397
9398(define_insn "sse4a_extrqi"
9399  [(set (match_operand:V2DI 0 "register_operand" "=x")
9400	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9401		      (match_operand 2 "const_0_to_255_operand" "")
9402		      (match_operand 3 "const_0_to_255_operand" "")]
9403		     UNSPEC_EXTRQI))]
9404  "TARGET_SSE4A"
9405  "extrq\t{%3, %2, %0|%0, %2, %3}"
9406  [(set_attr "type" "sse")
9407   (set_attr "prefix_data16" "1")
9408   (set_attr "length_immediate" "2")
9409   (set_attr "mode" "TI")])
9410
9411(define_insn "sse4a_extrq"
9412  [(set (match_operand:V2DI 0 "register_operand" "=x")
9413	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9414		      (match_operand:V16QI 2 "register_operand" "x")]
9415		     UNSPEC_EXTRQ))]
9416  "TARGET_SSE4A"
9417  "extrq\t{%2, %0|%0, %2}"
9418  [(set_attr "type" "sse")
9419   (set_attr "prefix_data16" "1")
9420   (set_attr "mode" "TI")])
9421
9422(define_insn "sse4a_insertqi"
9423  [(set (match_operand:V2DI 0 "register_operand" "=x")
9424	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9425		      (match_operand:V2DI 2 "register_operand" "x")
9426		      (match_operand 3 "const_0_to_255_operand" "")
9427		      (match_operand 4 "const_0_to_255_operand" "")]
9428		     UNSPEC_INSERTQI))]
9429  "TARGET_SSE4A"
9430  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9431  [(set_attr "type" "sseins")
9432   (set_attr "prefix_data16" "0")
9433   (set_attr "prefix_rep" "1")
9434   (set_attr "length_immediate" "2")
9435   (set_attr "mode" "TI")])
9436
9437(define_insn "sse4a_insertq"
9438  [(set (match_operand:V2DI 0 "register_operand" "=x")
9439	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9440		      (match_operand:V2DI 2 "register_operand" "x")]
9441		     UNSPEC_INSERTQ))]
9442  "TARGET_SSE4A"
9443  "insertq\t{%2, %0|%0, %2}"
9444  [(set_attr "type" "sseins")
9445   (set_attr "prefix_data16" "0")
9446   (set_attr "prefix_rep" "1")
9447   (set_attr "mode" "TI")])
9448
9449;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9450;;
9451;; Intel SSE4.1 instructions
9452;;
9453;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9454
9455(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9456  [(set (match_operand:VF 0 "register_operand" "=x,x")
9457	(vec_merge:VF
9458	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9459	  (match_operand:VF 1 "register_operand" "0,x")
9460	  (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9461  "TARGET_SSE4_1"
9462  "@
9463   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9464   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9465  [(set_attr "isa" "noavx,avx")
9466   (set_attr "type" "ssemov")
9467   (set_attr "length_immediate" "1")
9468   (set_attr "prefix_data16" "1,*")
9469   (set_attr "prefix_extra" "1")
9470   (set_attr "prefix" "orig,vex")
9471   (set_attr "mode" "<MODE>")])
9472
9473(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9474  [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9475	(unspec:VF
9476	  [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9477	   (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9478	   (match_operand:VF 3 "register_operand" "Yz,x")]
9479	  UNSPEC_BLENDV))]
9480  "TARGET_SSE4_1"
9481  "@
9482   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9483   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9484  [(set_attr "isa" "noavx,avx")
9485   (set_attr "type" "ssemov")
9486   (set_attr "length_immediate" "1")
9487   (set_attr "prefix_data16" "1,*")
9488   (set_attr "prefix_extra" "1")
9489   (set_attr "prefix" "orig,vex")
9490   (set_attr "mode" "<MODE>")])
9491
9492(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9493  [(set (match_operand:VF 0 "register_operand" "=x,x")
9494	(unspec:VF
9495	  [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9496	   (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9497	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9498	  UNSPEC_DP))]
9499  "TARGET_SSE4_1"
9500  "@
9501   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9502   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9503  [(set_attr "isa" "noavx,avx")
9504   (set_attr "type" "ssemul")
9505   (set_attr "length_immediate" "1")
9506   (set_attr "prefix_data16" "1,*")
9507   (set_attr "prefix_extra" "1")
9508   (set_attr "prefix" "orig,vex")
9509   (set_attr "mode" "<MODE>")])
9510
9511(define_insn "<sse4_1_avx2>_movntdqa"
9512  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9513	(unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9514		     UNSPEC_MOVNTDQA))]
9515  "TARGET_SSE4_1"
9516  "%vmovntdqa\t{%1, %0|%0, %1}"
9517  [(set_attr "type" "ssemov")
9518   (set_attr "prefix_extra" "1")
9519   (set_attr "prefix" "maybe_vex")
9520   (set_attr "mode" "<sseinsnmode>")])
9521
9522(define_insn "<sse4_1_avx2>_mpsadbw"
9523  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9524	(unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9525			  (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9526			  (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9527			 UNSPEC_MPSADBW))]
9528  "TARGET_SSE4_1"
9529  "@
9530   mpsadbw\t{%3, %2, %0|%0, %2, %3}
9531   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9532  [(set_attr "isa" "noavx,avx")
9533   (set_attr "type" "sselog1")
9534   (set_attr "length_immediate" "1")
9535   (set_attr "prefix_extra" "1")
9536   (set_attr "prefix" "orig,vex")
9537   (set_attr "mode" "<sseinsnmode>")])
9538
9539(define_insn "avx2_packusdw"
9540  [(set (match_operand:V16HI 0 "register_operand" "=x")
9541	(vec_concat:V16HI
9542	  (us_truncate:V8HI
9543	    (match_operand:V8SI 1 "register_operand" "x"))
9544	  (us_truncate:V8HI
9545	    (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9546  "TARGET_AVX2"
9547  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9548  [(set_attr "type" "sselog")
9549   (set_attr "prefix_extra" "1")
9550   (set_attr "prefix" "vex")
9551   (set_attr "mode" "OI")])
9552
9553(define_insn "sse4_1_packusdw"
9554  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9555	(vec_concat:V8HI
9556	  (us_truncate:V4HI
9557	    (match_operand:V4SI 1 "register_operand" "0,x"))
9558	  (us_truncate:V4HI
9559	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9560  "TARGET_SSE4_1"
9561  "@
9562   packusdw\t{%2, %0|%0, %2}
9563   vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9564  [(set_attr "isa" "noavx,avx")
9565   (set_attr "type" "sselog")
9566   (set_attr "prefix_extra" "1")
9567   (set_attr "prefix" "orig,vex")
9568   (set_attr "mode" "TI")])
9569
9570(define_insn "<sse4_1_avx2>_pblendvb"
9571  [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9572	(unspec:VI1_AVX2
9573	  [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx"  "0,x")
9574	   (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9575	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9576	  UNSPEC_BLENDV))]
9577  "TARGET_SSE4_1"
9578  "@
9579   pblendvb\t{%3, %2, %0|%0, %2, %3}
9580   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9581  [(set_attr "isa" "noavx,avx")
9582   (set_attr "type" "ssemov")
9583   (set_attr "prefix_extra" "1")
9584   (set_attr "length_immediate" "*,1")
9585   (set_attr "prefix" "orig,vex")
9586   (set_attr "mode" "<sseinsnmode>")])
9587
9588(define_insn "sse4_1_pblendw"
9589  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9590	(vec_merge:V8HI
9591	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9592	  (match_operand:V8HI 1 "register_operand" "0,x")
9593	  (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9594  "TARGET_SSE4_1"
9595  "@
9596   pblendw\t{%3, %2, %0|%0, %2, %3}
9597   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9598  [(set_attr "isa" "noavx,avx")
9599   (set_attr "type" "ssemov")
9600   (set_attr "prefix_extra" "1")
9601   (set_attr "length_immediate" "1")
9602   (set_attr "prefix" "orig,vex")
9603   (set_attr "mode" "TI")])
9604
9605;; The builtin uses an 8-bit immediate.  Expand that.
9606(define_expand "avx2_pblendw"
9607  [(set (match_operand:V16HI 0 "register_operand" "")
9608	(vec_merge:V16HI
9609	  (match_operand:V16HI 2 "nonimmediate_operand" "")
9610	  (match_operand:V16HI 1 "register_operand" "")
9611	  (match_operand:SI 3 "const_0_to_255_operand" "")))]
9612  "TARGET_AVX2"
9613{
9614  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9615  operands[3] = GEN_INT (val << 8 | val);
9616})
9617
9618(define_insn "*avx2_pblendw"
9619  [(set (match_operand:V16HI 0 "register_operand" "=x")
9620	(vec_merge:V16HI
9621	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9622	  (match_operand:V16HI 1 "register_operand" "x")
9623	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9624  "TARGET_AVX2"
9625{
9626  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9627  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9628}
9629  [(set_attr "type" "ssemov")
9630   (set_attr "prefix_extra" "1")
9631   (set_attr "length_immediate" "1")
9632   (set_attr "prefix" "vex")
9633   (set_attr "mode" "OI")])
9634
9635(define_insn "avx2_pblendd<mode>"
9636  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9637	(vec_merge:VI4_AVX2
9638	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9639	  (match_operand:VI4_AVX2 1 "register_operand" "x")
9640	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9641  "TARGET_AVX2"
9642  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9643  [(set_attr "type" "ssemov")
9644   (set_attr "prefix_extra" "1")
9645   (set_attr "length_immediate" "1")
9646   (set_attr "prefix" "vex")
9647   (set_attr "mode" "<sseinsnmode>")])
9648
9649(define_insn "sse4_1_phminposuw"
9650  [(set (match_operand:V8HI 0 "register_operand" "=x")
9651	(unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9652		     UNSPEC_PHMINPOSUW))]
9653  "TARGET_SSE4_1"
9654  "%vphminposuw\t{%1, %0|%0, %1}"
9655  [(set_attr "type" "sselog1")
9656   (set_attr "prefix_extra" "1")
9657   (set_attr "prefix" "maybe_vex")
9658   (set_attr "mode" "TI")])
9659
9660(define_insn "avx2_<code>v16qiv16hi2"
9661  [(set (match_operand:V16HI 0 "register_operand" "=x")
9662	(any_extend:V16HI
9663	  (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9664  "TARGET_AVX2"
9665  "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9666  [(set_attr "type" "ssemov")
9667   (set_attr "prefix_extra" "1")
9668   (set_attr "prefix" "vex")
9669   (set_attr "mode" "OI")])
9670
9671(define_insn "sse4_1_<code>v8qiv8hi2"
9672  [(set (match_operand:V8HI 0 "register_operand" "=x")
9673	(any_extend:V8HI
9674	  (vec_select:V8QI
9675	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9676	    (parallel [(const_int 0)
9677		       (const_int 1)
9678		       (const_int 2)
9679		       (const_int 3)
9680		       (const_int 4)
9681		       (const_int 5)
9682		       (const_int 6)
9683		       (const_int 7)]))))]
9684  "TARGET_SSE4_1"
9685  "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9686  [(set_attr "type" "ssemov")
9687   (set_attr "prefix_extra" "1")
9688   (set_attr "prefix" "maybe_vex")
9689   (set_attr "mode" "TI")])
9690
9691(define_insn "avx2_<code>v8qiv8si2"
9692  [(set (match_operand:V8SI 0 "register_operand" "=x")
9693	(any_extend:V8SI
9694	  (vec_select:V8QI
9695	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9696	    (parallel [(const_int 0)
9697		       (const_int 1)
9698		       (const_int 2)
9699		       (const_int 3)
9700		       (const_int 4)
9701		       (const_int 5)
9702		       (const_int 6)
9703		       (const_int 7)]))))]
9704  "TARGET_AVX2"
9705  "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9706  [(set_attr "type" "ssemov")
9707   (set_attr "prefix_extra" "1")
9708   (set_attr "prefix" "vex")
9709   (set_attr "mode" "OI")])
9710
9711(define_insn "sse4_1_<code>v4qiv4si2"
9712  [(set (match_operand:V4SI 0 "register_operand" "=x")
9713	(any_extend:V4SI
9714	  (vec_select:V4QI
9715	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9716	    (parallel [(const_int 0)
9717		       (const_int 1)
9718		       (const_int 2)
9719		       (const_int 3)]))))]
9720  "TARGET_SSE4_1"
9721  "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9722  [(set_attr "type" "ssemov")
9723   (set_attr "prefix_extra" "1")
9724   (set_attr "prefix" "maybe_vex")
9725   (set_attr "mode" "TI")])
9726
9727(define_insn "avx2_<code>v8hiv8si2"
9728  [(set (match_operand:V8SI 0 "register_operand" "=x")
9729	(any_extend:V8SI
9730	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9731  "TARGET_AVX2"
9732  "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9733  [(set_attr "type" "ssemov")
9734   (set_attr "prefix_extra" "1")
9735   (set_attr "prefix" "vex")
9736   (set_attr "mode" "OI")])
9737
9738(define_insn "sse4_1_<code>v4hiv4si2"
9739  [(set (match_operand:V4SI 0 "register_operand" "=x")
9740	(any_extend:V4SI
9741	  (vec_select:V4HI
9742	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9743	    (parallel [(const_int 0)
9744		       (const_int 1)
9745		       (const_int 2)
9746		       (const_int 3)]))))]
9747  "TARGET_SSE4_1"
9748  "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9749  [(set_attr "type" "ssemov")
9750   (set_attr "prefix_extra" "1")
9751   (set_attr "prefix" "maybe_vex")
9752   (set_attr "mode" "TI")])
9753
9754(define_insn "avx2_<code>v4qiv4di2"
9755  [(set (match_operand:V4DI 0 "register_operand" "=x")
9756	(any_extend:V4DI
9757	  (vec_select:V4QI
9758	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9759	    (parallel [(const_int 0)
9760		       (const_int 1)
9761		       (const_int 2)
9762		       (const_int 3)]))))]
9763  "TARGET_AVX2"
9764  "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9765  [(set_attr "type" "ssemov")
9766   (set_attr "prefix_extra" "1")
9767   (set_attr "prefix" "vex")
9768   (set_attr "mode" "OI")])
9769
9770(define_insn "sse4_1_<code>v2qiv2di2"
9771  [(set (match_operand:V2DI 0 "register_operand" "=x")
9772	(any_extend:V2DI
9773	  (vec_select:V2QI
9774	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9775	    (parallel [(const_int 0)
9776		       (const_int 1)]))))]
9777  "TARGET_SSE4_1"
9778  "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9779  [(set_attr "type" "ssemov")
9780   (set_attr "prefix_extra" "1")
9781   (set_attr "prefix" "maybe_vex")
9782   (set_attr "mode" "TI")])
9783
9784(define_insn "avx2_<code>v4hiv4di2"
9785  [(set (match_operand:V4DI 0 "register_operand" "=x")
9786	(any_extend:V4DI
9787	  (vec_select:V4HI
9788	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9789	    (parallel [(const_int 0)
9790		       (const_int 1)
9791		       (const_int 2)
9792		       (const_int 3)]))))]
9793  "TARGET_AVX2"
9794  "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9795  [(set_attr "type" "ssemov")
9796   (set_attr "prefix_extra" "1")
9797   (set_attr "prefix" "vex")
9798   (set_attr "mode" "OI")])
9799
9800(define_insn "sse4_1_<code>v2hiv2di2"
9801  [(set (match_operand:V2DI 0 "register_operand" "=x")
9802	(any_extend:V2DI
9803	  (vec_select:V2HI
9804	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9805	    (parallel [(const_int 0)
9806		       (const_int 1)]))))]
9807  "TARGET_SSE4_1"
9808  "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9809  [(set_attr "type" "ssemov")
9810   (set_attr "prefix_extra" "1")
9811   (set_attr "prefix" "maybe_vex")
9812   (set_attr "mode" "TI")])
9813
9814(define_insn "avx2_<code>v4siv4di2"
9815  [(set (match_operand:V4DI 0 "register_operand" "=x")
9816	(any_extend:V4DI
9817	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9818  "TARGET_AVX2"
9819  "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9820  [(set_attr "type" "ssemov")
9821   (set_attr "prefix_extra" "1")
9822   (set_attr "mode" "OI")])
9823
9824(define_insn "sse4_1_<code>v2siv2di2"
9825  [(set (match_operand:V2DI 0 "register_operand" "=x")
9826	(any_extend:V2DI
9827	  (vec_select:V2SI
9828	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9829	    (parallel [(const_int 0)
9830		       (const_int 1)]))))]
9831  "TARGET_SSE4_1"
9832  "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9833  [(set_attr "type" "ssemov")
9834   (set_attr "prefix_extra" "1")
9835   (set_attr "prefix" "maybe_vex")
9836   (set_attr "mode" "TI")])
9837
9838;; ptestps/ptestpd are very similar to comiss and ucomiss when
9839;; setting FLAGS_REG. But it is not a really compare instruction.
9840(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9841  [(set (reg:CC FLAGS_REG)
9842	(unspec:CC [(match_operand:VF 0 "register_operand" "x")
9843		    (match_operand:VF 1 "nonimmediate_operand" "xm")]
9844		   UNSPEC_VTESTP))]
9845  "TARGET_AVX"
9846  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9847  [(set_attr "type" "ssecomi")
9848   (set_attr "prefix_extra" "1")
9849   (set_attr "prefix" "vex")
9850   (set_attr "mode" "<MODE>")])
9851
9852;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9853;; But it is not a really compare instruction.
9854(define_insn "avx_ptest256"
9855  [(set (reg:CC FLAGS_REG)
9856	(unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9857		    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9858		   UNSPEC_PTEST))]
9859  "TARGET_AVX"
9860  "vptest\t{%1, %0|%0, %1}"
9861  [(set_attr "type" "ssecomi")
9862   (set_attr "prefix_extra" "1")
9863   (set_attr "prefix" "vex")
9864   (set_attr "mode" "OI")])
9865
9866(define_insn "sse4_1_ptest"
9867  [(set (reg:CC FLAGS_REG)
9868	(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9869		    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9870		   UNSPEC_PTEST))]
9871  "TARGET_SSE4_1"
9872  "%vptest\t{%1, %0|%0, %1}"
9873  [(set_attr "type" "ssecomi")
9874   (set_attr "prefix_extra" "1")
9875   (set_attr "prefix" "maybe_vex")
9876   (set_attr "mode" "TI")])
9877
9878(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9879  [(set (match_operand:VF 0 "register_operand" "=x")
9880	(unspec:VF
9881	  [(match_operand:VF 1 "nonimmediate_operand" "xm")
9882	   (match_operand:SI 2 "const_0_to_15_operand" "n")]
9883	  UNSPEC_ROUND))]
9884  "TARGET_ROUND"
9885  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9886  [(set_attr "type" "ssecvt")
9887   (set (attr "prefix_data16")
9888     (if_then_else
9889       (match_test "TARGET_AVX")
9890     (const_string "*")
9891     (const_string "1")))
9892   (set_attr "prefix_extra" "1")
9893   (set_attr "length_immediate" "1")
9894   (set_attr "prefix" "maybe_vex")
9895   (set_attr "mode" "<MODE>")])
9896
9897(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
9898  [(match_operand:<sseintvecmode> 0 "register_operand" "")
9899   (match_operand:VF1 1 "nonimmediate_operand" "")
9900   (match_operand:SI 2 "const_0_to_15_operand" "")]
9901  "TARGET_ROUND"
9902{
9903  rtx tmp = gen_reg_rtx (<MODE>mode);
9904
9905  emit_insn
9906    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
9907						       operands[2]));
9908  emit_insn
9909    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
9910  DONE;
9911})
9912
9913(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
9914  [(match_operand:<ssepackfltmode> 0 "register_operand" "")
9915   (match_operand:VF2 1 "nonimmediate_operand" "")
9916   (match_operand:VF2 2 "nonimmediate_operand" "")
9917   (match_operand:SI 3 "const_0_to_15_operand" "")]
9918  "TARGET_ROUND"
9919{
9920  rtx tmp0, tmp1;
9921
9922  if (<MODE>mode == V2DFmode
9923      && TARGET_AVX && !TARGET_PREFER_AVX128)
9924    {
9925      rtx tmp2 = gen_reg_rtx (V4DFmode);
9926
9927      tmp0 = gen_reg_rtx (V4DFmode);
9928      tmp1 = force_reg (V2DFmode, operands[1]);
9929
9930      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9931      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9932      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9933    }
9934  else
9935    {
9936      tmp0 = gen_reg_rtx (<MODE>mode);
9937      tmp1 = gen_reg_rtx (<MODE>mode);
9938
9939      emit_insn
9940       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
9941							  operands[3]));
9942      emit_insn
9943       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
9944							  operands[3]));
9945      emit_insn
9946       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
9947    }
9948  DONE;
9949})
9950
9951(define_insn "sse4_1_round<ssescalarmodesuffix>"
9952  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9953	(vec_merge:VF_128
9954	  (unspec:VF_128
9955	    [(match_operand:VF_128 2 "register_operand" "x,x")
9956	     (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9957	    UNSPEC_ROUND)
9958	  (match_operand:VF_128 1 "register_operand" "0,x")
9959	  (const_int 1)))]
9960  "TARGET_ROUND"
9961  "@
9962   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9963   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9964  [(set_attr "isa" "noavx,avx")
9965   (set_attr "type" "ssecvt")
9966   (set_attr "length_immediate" "1")
9967   (set_attr "prefix_data16" "1,*")
9968   (set_attr "prefix_extra" "1")
9969   (set_attr "prefix" "orig,vex")
9970   (set_attr "mode" "<MODE>")])
9971
9972(define_expand "round<mode>2"
9973  [(set (match_dup 4)
9974	(plus:VF
9975	  (match_operand:VF 1 "register_operand" "")
9976	  (match_dup 3)))
9977   (set (match_operand:VF 0 "register_operand" "")
9978	(unspec:VF
9979	  [(match_dup 4) (match_dup 5)]
9980	  UNSPEC_ROUND))]
9981  "TARGET_ROUND && !flag_trapping_math"
9982{
9983  enum machine_mode scalar_mode;
9984  const struct real_format *fmt;
9985  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9986  rtx half, vec_half;
9987
9988  scalar_mode = GET_MODE_INNER (<MODE>mode);
9989
9990  /* load nextafter (0.5, 0.0) */
9991  fmt = REAL_MODE_FORMAT (scalar_mode);
9992  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9993  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9994  half = const_double_from_real_value (pred_half, scalar_mode);
9995
9996  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9997  vec_half = force_reg (<MODE>mode, vec_half);
9998
9999  operands[3] = gen_reg_rtx (<MODE>mode);
10000  emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
10001
10002  operands[4] = gen_reg_rtx (<MODE>mode);
10003  operands[5] = GEN_INT (ROUND_TRUNC);
10004})
10005
10006(define_expand "round<mode>2_sfix"
10007  [(match_operand:<sseintvecmode> 0 "register_operand" "")
10008   (match_operand:VF1 1 "register_operand" "")]
10009  "TARGET_ROUND && !flag_trapping_math"
10010{
10011  rtx tmp = gen_reg_rtx (<MODE>mode);
10012
10013  emit_insn (gen_round<mode>2 (tmp, operands[1]));
10014
10015  emit_insn
10016    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
10017  DONE;
10018})
10019
10020(define_expand "round<mode>2_vec_pack_sfix"
10021  [(match_operand:<ssepackfltmode> 0 "register_operand" "")
10022   (match_operand:VF2 1 "register_operand" "")
10023   (match_operand:VF2 2 "register_operand" "")]
10024  "TARGET_ROUND && !flag_trapping_math"
10025{
10026  rtx tmp0, tmp1;
10027
10028  if (<MODE>mode == V2DFmode
10029      && TARGET_AVX && !TARGET_PREFER_AVX128)
10030    {
10031      rtx tmp2 = gen_reg_rtx (V4DFmode);
10032
10033      tmp0 = gen_reg_rtx (V4DFmode);
10034      tmp1 = force_reg (V2DFmode, operands[1]);
10035
10036      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10037      emit_insn (gen_roundv4df2 (tmp2, tmp0));
10038      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10039    }
10040  else
10041    {
10042      tmp0 = gen_reg_rtx (<MODE>mode);
10043      tmp1 = gen_reg_rtx (<MODE>mode);
10044
10045      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
10046      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
10047
10048      emit_insn
10049       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
10050    }
10051  DONE;
10052})
10053
10054;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10055;;
10056;; Intel SSE4.2 string/text processing instructions
10057;;
10058;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10059
10060(define_insn_and_split "sse4_2_pcmpestr"
10061  [(set (match_operand:SI 0 "register_operand" "=c,c")
10062	(unspec:SI
10063	  [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10064	   (match_operand:SI 3 "register_operand" "a,a")
10065	   (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10066	   (match_operand:SI 5 "register_operand" "d,d")
10067	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10068	  UNSPEC_PCMPESTR))
10069   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10070	(unspec:V16QI
10071	  [(match_dup 2)
10072	   (match_dup 3)
10073	   (match_dup 4)
10074	   (match_dup 5)
10075	   (match_dup 6)]
10076	  UNSPEC_PCMPESTR))
10077   (set (reg:CC FLAGS_REG)
10078	(unspec:CC
10079	  [(match_dup 2)
10080	   (match_dup 3)
10081	   (match_dup 4)
10082	   (match_dup 5)
10083	   (match_dup 6)]
10084	  UNSPEC_PCMPESTR))]
10085  "TARGET_SSE4_2
10086   && can_create_pseudo_p ()"
10087  "#"
10088  "&& 1"
10089  [(const_int 0)]
10090{
10091  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10092  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10093  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10094
10095  if (ecx)
10096    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10097				     operands[3], operands[4],
10098				     operands[5], operands[6]));
10099  if (xmm0)
10100    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10101				     operands[3], operands[4],
10102				     operands[5], operands[6]));
10103  if (flags && !(ecx || xmm0))
10104    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10105					   operands[2], operands[3],
10106					   operands[4], operands[5],
10107					   operands[6]));
10108  if (!(flags || ecx || xmm0))
10109    emit_note (NOTE_INSN_DELETED);
10110
10111  DONE;
10112}
10113  [(set_attr "type" "sselog")
10114   (set_attr "prefix_data16" "1")
10115   (set_attr "prefix_extra" "1")
10116   (set_attr "length_immediate" "1")
10117   (set_attr "memory" "none,load")
10118   (set_attr "mode" "TI")])
10119
10120(define_insn "sse4_2_pcmpestri"
10121  [(set (match_operand:SI 0 "register_operand" "=c,c")
10122	(unspec:SI
10123	  [(match_operand:V16QI 1 "register_operand" "x,x")
10124	   (match_operand:SI 2 "register_operand" "a,a")
10125	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10126	   (match_operand:SI 4 "register_operand" "d,d")
10127	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10128	  UNSPEC_PCMPESTR))
10129   (set (reg:CC FLAGS_REG)
10130	(unspec:CC
10131	  [(match_dup 1)
10132	   (match_dup 2)
10133	   (match_dup 3)
10134	   (match_dup 4)
10135	   (match_dup 5)]
10136	  UNSPEC_PCMPESTR))]
10137  "TARGET_SSE4_2"
10138  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10139  [(set_attr "type" "sselog")
10140   (set_attr "prefix_data16" "1")
10141   (set_attr "prefix_extra" "1")
10142   (set_attr "prefix" "maybe_vex")
10143   (set_attr "length_immediate" "1")
10144   (set_attr "memory" "none,load")
10145   (set_attr "mode" "TI")])
10146
10147(define_insn "sse4_2_pcmpestrm"
10148  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10149	(unspec:V16QI
10150	  [(match_operand:V16QI 1 "register_operand" "x,x")
10151	   (match_operand:SI 2 "register_operand" "a,a")
10152	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10153	   (match_operand:SI 4 "register_operand" "d,d")
10154	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10155	  UNSPEC_PCMPESTR))
10156   (set (reg:CC FLAGS_REG)
10157	(unspec:CC
10158	  [(match_dup 1)
10159	   (match_dup 2)
10160	   (match_dup 3)
10161	   (match_dup 4)
10162	   (match_dup 5)]
10163	  UNSPEC_PCMPESTR))]
10164  "TARGET_SSE4_2"
10165  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10166  [(set_attr "type" "sselog")
10167   (set_attr "prefix_data16" "1")
10168   (set_attr "prefix_extra" "1")
10169   (set_attr "length_immediate" "1")
10170   (set_attr "prefix" "maybe_vex")
10171   (set_attr "memory" "none,load")
10172   (set_attr "mode" "TI")])
10173
10174(define_insn "sse4_2_pcmpestr_cconly"
10175  [(set (reg:CC FLAGS_REG)
10176	(unspec:CC
10177	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10178	   (match_operand:SI 3 "register_operand" "a,a,a,a")
10179	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10180	   (match_operand:SI 5 "register_operand" "d,d,d,d")
10181	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10182	  UNSPEC_PCMPESTR))
10183   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10184   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10185  "TARGET_SSE4_2"
10186  "@
10187   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10188   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10189   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10190   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10191  [(set_attr "type" "sselog")
10192   (set_attr "prefix_data16" "1")
10193   (set_attr "prefix_extra" "1")
10194   (set_attr "length_immediate" "1")
10195   (set_attr "memory" "none,load,none,load")
10196   (set_attr "prefix" "maybe_vex")
10197   (set_attr "mode" "TI")])
10198
10199(define_insn_and_split "sse4_2_pcmpistr"
10200  [(set (match_operand:SI 0 "register_operand" "=c,c")
10201	(unspec:SI
10202	  [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10203	   (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10204	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10205	  UNSPEC_PCMPISTR))
10206   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10207	(unspec:V16QI
10208	  [(match_dup 2)
10209	   (match_dup 3)
10210	   (match_dup 4)]
10211	  UNSPEC_PCMPISTR))
10212   (set (reg:CC FLAGS_REG)
10213	(unspec:CC
10214	  [(match_dup 2)
10215	   (match_dup 3)
10216	   (match_dup 4)]
10217	  UNSPEC_PCMPISTR))]
10218  "TARGET_SSE4_2
10219   && can_create_pseudo_p ()"
10220  "#"
10221  "&& 1"
10222  [(const_int 0)]
10223{
10224  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10225  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10226  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10227
10228  if (ecx)
10229    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10230				     operands[3], operands[4]));
10231  if (xmm0)
10232    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10233				     operands[3], operands[4]));
10234  if (flags && !(ecx || xmm0))
10235    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10236					   operands[2], operands[3],
10237					   operands[4]));
10238  if (!(flags || ecx || xmm0))
10239    emit_note (NOTE_INSN_DELETED);
10240
10241  DONE;
10242}
10243  [(set_attr "type" "sselog")
10244   (set_attr "prefix_data16" "1")
10245   (set_attr "prefix_extra" "1")
10246   (set_attr "length_immediate" "1")
10247   (set_attr "memory" "none,load")
10248   (set_attr "mode" "TI")])
10249
10250(define_insn "sse4_2_pcmpistri"
10251  [(set (match_operand:SI 0 "register_operand" "=c,c")
10252	(unspec:SI
10253	  [(match_operand:V16QI 1 "register_operand" "x,x")
10254	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10255	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10256	  UNSPEC_PCMPISTR))
10257   (set (reg:CC FLAGS_REG)
10258	(unspec:CC
10259	  [(match_dup 1)
10260	   (match_dup 2)
10261	   (match_dup 3)]
10262	  UNSPEC_PCMPISTR))]
10263  "TARGET_SSE4_2"
10264  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10265  [(set_attr "type" "sselog")
10266   (set_attr "prefix_data16" "1")
10267   (set_attr "prefix_extra" "1")
10268   (set_attr "length_immediate" "1")
10269   (set_attr "prefix" "maybe_vex")
10270   (set_attr "memory" "none,load")
10271   (set_attr "mode" "TI")])
10272
10273(define_insn "sse4_2_pcmpistrm"
10274  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10275	(unspec:V16QI
10276	  [(match_operand:V16QI 1 "register_operand" "x,x")
10277	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10278	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10279	  UNSPEC_PCMPISTR))
10280   (set (reg:CC FLAGS_REG)
10281	(unspec:CC
10282	  [(match_dup 1)
10283	   (match_dup 2)
10284	   (match_dup 3)]
10285	  UNSPEC_PCMPISTR))]
10286  "TARGET_SSE4_2"
10287  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10288  [(set_attr "type" "sselog")
10289   (set_attr "prefix_data16" "1")
10290   (set_attr "prefix_extra" "1")
10291   (set_attr "length_immediate" "1")
10292   (set_attr "prefix" "maybe_vex")
10293   (set_attr "memory" "none,load")
10294   (set_attr "mode" "TI")])
10295
10296(define_insn "sse4_2_pcmpistr_cconly"
10297  [(set (reg:CC FLAGS_REG)
10298	(unspec:CC
10299	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10300	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10301	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10302	  UNSPEC_PCMPISTR))
10303   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10304   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10305  "TARGET_SSE4_2"
10306  "@
10307   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10308   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10309   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10310   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10311  [(set_attr "type" "sselog")
10312   (set_attr "prefix_data16" "1")
10313   (set_attr "prefix_extra" "1")
10314   (set_attr "length_immediate" "1")
10315   (set_attr "memory" "none,load,none,load")
10316   (set_attr "prefix" "maybe_vex")
10317   (set_attr "mode" "TI")])
10318
10319;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10320;;
10321;; XOP instructions
10322;;
10323;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10324
10325;; XOP parallel integer multiply/add instructions.
10326;; Note the XOP multiply/add instructions
10327;;     a[i] = b[i] * c[i] + d[i];
10328;; do not allow the value being added to be a memory operation.
10329(define_insn "xop_pmacsww"
10330  [(set (match_operand:V8HI 0 "register_operand" "=x")
10331	(plus:V8HI
10332	 (mult:V8HI
10333	  (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10334	  (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10335	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10336  "TARGET_XOP"
10337  "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10338  [(set_attr "type" "ssemuladd")
10339   (set_attr "mode" "TI")])
10340
10341(define_insn "xop_pmacssww"
10342  [(set (match_operand:V8HI 0 "register_operand" "=x")
10343	(ss_plus:V8HI
10344	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10345		    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10346	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10347  "TARGET_XOP"
10348  "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10349  [(set_attr "type" "ssemuladd")
10350   (set_attr "mode" "TI")])
10351
10352(define_insn "xop_pmacsdd"
10353  [(set (match_operand:V4SI 0 "register_operand" "=x")
10354	(plus:V4SI
10355	 (mult:V4SI
10356	  (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10357	  (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10358	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10359  "TARGET_XOP"
10360  "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10361  [(set_attr "type" "ssemuladd")
10362   (set_attr "mode" "TI")])
10363
10364(define_insn "xop_pmacssdd"
10365  [(set (match_operand:V4SI 0 "register_operand" "=x")
10366	(ss_plus:V4SI
10367	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10368		    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10369	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10370  "TARGET_XOP"
10371  "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10372  [(set_attr "type" "ssemuladd")
10373   (set_attr "mode" "TI")])
10374
10375(define_insn "xop_pmacssdql"
10376  [(set (match_operand:V2DI 0 "register_operand" "=x")
10377	(ss_plus:V2DI
10378	 (mult:V2DI
10379	  (sign_extend:V2DI
10380	   (vec_select:V2SI
10381	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10382            (parallel [(const_int 0)
10383                       (const_int 2)])))
10384	   (vec_select:V2SI
10385	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10386	   (parallel [(const_int 0)
10387		      (const_int 2)])))
10388	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10389  "TARGET_XOP"
10390  "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10391  [(set_attr "type" "ssemuladd")
10392   (set_attr "mode" "TI")])
10393
10394(define_insn "xop_pmacssdqh"
10395  [(set (match_operand:V2DI 0 "register_operand" "=x")
10396	(ss_plus:V2DI
10397	 (mult:V2DI
10398	  (sign_extend:V2DI
10399	   (vec_select:V2SI
10400	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10401	    (parallel [(const_int 1)
10402		       (const_int 3)])))
10403	  (sign_extend:V2DI
10404	   (vec_select:V2SI
10405	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10406	    (parallel [(const_int 1)
10407		       (const_int 3)]))))
10408	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10409  "TARGET_XOP"
10410  "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10411  [(set_attr "type" "ssemuladd")
10412   (set_attr "mode" "TI")])
10413
10414(define_insn "xop_pmacsdql"
10415  [(set (match_operand:V2DI 0 "register_operand" "=x")
10416	(plus:V2DI
10417	 (mult:V2DI
10418	  (sign_extend:V2DI
10419	   (vec_select:V2SI
10420	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10421	    (parallel [(const_int 0)
10422		       (const_int 2)])))
10423	  (sign_extend:V2DI
10424	   (vec_select:V2SI
10425	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10426	    (parallel [(const_int 0)
10427		       (const_int 2)]))))
10428	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10429  "TARGET_XOP"
10430  "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10431  [(set_attr "type" "ssemuladd")
10432   (set_attr "mode" "TI")])
10433
10434(define_insn "xop_pmacsdqh"
10435  [(set (match_operand:V2DI 0 "register_operand" "=x")
10436	(plus:V2DI
10437	 (mult:V2DI
10438	  (sign_extend:V2DI
10439	   (vec_select:V2SI
10440	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10441	    (parallel [(const_int 1)
10442		       (const_int 3)])))
10443	  (sign_extend:V2DI
10444	   (vec_select:V2SI
10445	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10446	    (parallel [(const_int 1)
10447		       (const_int 3)]))))
10448	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10449  "TARGET_XOP"
10450  "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10451  [(set_attr "type" "ssemuladd")
10452   (set_attr "mode" "TI")])
10453
10454;; XOP parallel integer multiply/add instructions for the intrinisics
10455(define_insn "xop_pmacsswd"
10456  [(set (match_operand:V4SI 0 "register_operand" "=x")
10457	(ss_plus:V4SI
10458	 (mult:V4SI
10459	  (sign_extend:V4SI
10460	   (vec_select:V4HI
10461	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10462	    (parallel [(const_int 1)
10463		       (const_int 3)
10464		       (const_int 5)
10465		       (const_int 7)])))
10466	  (sign_extend:V4SI
10467	   (vec_select:V4HI
10468	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10469	    (parallel [(const_int 1)
10470		       (const_int 3)
10471		       (const_int 5)
10472		       (const_int 7)]))))
10473	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10474  "TARGET_XOP"
10475  "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10476  [(set_attr "type" "ssemuladd")
10477   (set_attr "mode" "TI")])
10478
10479(define_insn "xop_pmacswd"
10480  [(set (match_operand:V4SI 0 "register_operand" "=x")
10481	(plus:V4SI
10482	 (mult:V4SI
10483	  (sign_extend:V4SI
10484	   (vec_select:V4HI
10485	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10486	    (parallel [(const_int 1)
10487		       (const_int 3)
10488		       (const_int 5)
10489		       (const_int 7)])))
10490	  (sign_extend:V4SI
10491	   (vec_select:V4HI
10492	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10493	    (parallel [(const_int 1)
10494		       (const_int 3)
10495		       (const_int 5)
10496		       (const_int 7)]))))
10497	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10498  "TARGET_XOP"
10499  "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10500  [(set_attr "type" "ssemuladd")
10501   (set_attr "mode" "TI")])
10502
10503(define_insn "xop_pmadcsswd"
10504  [(set (match_operand:V4SI 0 "register_operand" "=x")
10505	(ss_plus:V4SI
10506	 (plus:V4SI
10507	  (mult:V4SI
10508	   (sign_extend:V4SI
10509	    (vec_select:V4HI
10510	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10511	     (parallel [(const_int 0)
10512			(const_int 2)
10513			(const_int 4)
10514			(const_int 6)])))
10515	   (sign_extend:V4SI
10516	    (vec_select:V4HI
10517	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10518	     (parallel [(const_int 0)
10519			(const_int 2)
10520			(const_int 4)
10521			(const_int 6)]))))
10522	  (mult:V4SI
10523	   (sign_extend:V4SI
10524	    (vec_select:V4HI
10525	     (match_dup 1)
10526	     (parallel [(const_int 1)
10527			(const_int 3)
10528			(const_int 5)
10529			(const_int 7)])))
10530	   (sign_extend:V4SI
10531	    (vec_select:V4HI
10532	     (match_dup 2)
10533	     (parallel [(const_int 1)
10534			(const_int 3)
10535			(const_int 5)
10536			(const_int 7)])))))
10537	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10538  "TARGET_XOP"
10539  "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10540  [(set_attr "type" "ssemuladd")
10541   (set_attr "mode" "TI")])
10542
10543(define_insn "xop_pmadcswd"
10544  [(set (match_operand:V4SI 0 "register_operand" "=x")
10545	(plus:V4SI
10546	 (plus:V4SI
10547	  (mult:V4SI
10548	   (sign_extend:V4SI
10549	    (vec_select:V4HI
10550	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10551	     (parallel [(const_int 0)
10552			(const_int 2)
10553			(const_int 4)
10554			(const_int 6)])))
10555	   (sign_extend:V4SI
10556	    (vec_select:V4HI
10557	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10558	     (parallel [(const_int 0)
10559			(const_int 2)
10560			(const_int 4)
10561			(const_int 6)]))))
10562	  (mult:V4SI
10563	   (sign_extend:V4SI
10564	    (vec_select:V4HI
10565	     (match_dup 1)
10566	     (parallel [(const_int 1)
10567			(const_int 3)
10568			(const_int 5)
10569			(const_int 7)])))
10570	   (sign_extend:V4SI
10571	    (vec_select:V4HI
10572	     (match_dup 2)
10573	     (parallel [(const_int 1)
10574			(const_int 3)
10575			(const_int 5)
10576			(const_int 7)])))))
10577	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10578  "TARGET_XOP"
10579  "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10580  [(set_attr "type" "ssemuladd")
10581   (set_attr "mode" "TI")])
10582
10583;; XOP parallel XMM conditional moves
10584(define_insn "xop_pcmov_<mode><avxsizesuffix>"
10585  [(set (match_operand:V 0 "register_operand" "=x,x")
10586	(if_then_else:V
10587	  (match_operand:V 3 "nonimmediate_operand" "x,m")
10588	  (match_operand:V 1 "register_operand" "x,x")
10589	  (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10590  "TARGET_XOP"
10591  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10592  [(set_attr "type" "sse4arg")])
10593
10594;; XOP horizontal add/subtract instructions
10595(define_insn "xop_phaddbw"
10596  [(set (match_operand:V8HI 0 "register_operand" "=x")
10597	(plus:V8HI
10598	 (sign_extend:V8HI
10599	  (vec_select:V8QI
10600	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10601	   (parallel [(const_int 0)
10602		      (const_int 2)
10603		      (const_int 4)
10604		      (const_int 6)
10605		      (const_int 8)
10606		      (const_int 10)
10607		      (const_int 12)
10608		      (const_int 14)])))
10609	 (sign_extend:V8HI
10610	  (vec_select:V8QI
10611	   (match_dup 1)
10612	   (parallel [(const_int 1)
10613		      (const_int 3)
10614		      (const_int 5)
10615		      (const_int 7)
10616		      (const_int 9)
10617		      (const_int 11)
10618		      (const_int 13)
10619		      (const_int 15)])))))]
10620  "TARGET_XOP"
10621  "vphaddbw\t{%1, %0|%0, %1}"
10622  [(set_attr "type" "sseiadd1")])
10623
10624(define_insn "xop_phaddbd"
10625  [(set (match_operand:V4SI 0 "register_operand" "=x")
10626	(plus:V4SI
10627	 (plus:V4SI
10628	  (sign_extend:V4SI
10629	   (vec_select:V4QI
10630	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10631	    (parallel [(const_int 0)
10632		       (const_int 4)
10633		       (const_int 8)
10634		       (const_int 12)])))
10635	  (sign_extend:V4SI
10636	   (vec_select:V4QI
10637	    (match_dup 1)
10638	    (parallel [(const_int 1)
10639		       (const_int 5)
10640		       (const_int 9)
10641		       (const_int 13)]))))
10642	 (plus:V4SI
10643	  (sign_extend:V4SI
10644	   (vec_select:V4QI
10645	    (match_dup 1)
10646	    (parallel [(const_int 2)
10647		       (const_int 6)
10648		       (const_int 10)
10649		       (const_int 14)])))
10650	  (sign_extend:V4SI
10651	   (vec_select:V4QI
10652	    (match_dup 1)
10653	    (parallel [(const_int 3)
10654		       (const_int 7)
10655		       (const_int 11)
10656		       (const_int 15)]))))))]
10657  "TARGET_XOP"
10658  "vphaddbd\t{%1, %0|%0, %1}"
10659  [(set_attr "type" "sseiadd1")])
10660
10661(define_insn "xop_phaddbq"
10662  [(set (match_operand:V2DI 0 "register_operand" "=x")
10663	(plus:V2DI
10664	 (plus:V2DI
10665	  (plus:V2DI
10666	   (sign_extend:V2DI
10667	    (vec_select:V2QI
10668	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10669	     (parallel [(const_int 0)
10670			(const_int 8)])))
10671	   (sign_extend:V2DI
10672	    (vec_select:V2QI
10673	     (match_dup 1)
10674	     (parallel [(const_int 1)
10675			(const_int 9)]))))
10676	  (plus:V2DI
10677	   (sign_extend:V2DI
10678	    (vec_select:V2QI
10679	     (match_dup 1)
10680	     (parallel [(const_int 2)
10681			(const_int 10)])))
10682	   (sign_extend:V2DI
10683	    (vec_select:V2QI
10684	     (match_dup 1)
10685	     (parallel [(const_int 3)
10686			(const_int 11)])))))
10687	 (plus:V2DI
10688	  (plus:V2DI
10689	   (sign_extend:V2DI
10690	    (vec_select:V2QI
10691	     (match_dup 1)
10692	     (parallel [(const_int 4)
10693			(const_int 12)])))
10694	   (sign_extend:V2DI
10695	    (vec_select:V2QI
10696	     (match_dup 1)
10697	     (parallel [(const_int 5)
10698			(const_int 13)]))))
10699	  (plus:V2DI
10700	   (sign_extend:V2DI
10701	    (vec_select:V2QI
10702	     (match_dup 1)
10703	     (parallel [(const_int 6)
10704			(const_int 14)])))
10705	   (sign_extend:V2DI
10706	    (vec_select:V2QI
10707	     (match_dup 1)
10708	     (parallel [(const_int 7)
10709			(const_int 15)])))))))]
10710  "TARGET_XOP"
10711  "vphaddbq\t{%1, %0|%0, %1}"
10712  [(set_attr "type" "sseiadd1")])
10713
10714(define_insn "xop_phaddwd"
10715  [(set (match_operand:V4SI 0 "register_operand" "=x")
10716	(plus:V4SI
10717	 (sign_extend:V4SI
10718	  (vec_select:V4HI
10719	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10720	   (parallel [(const_int 0)
10721		      (const_int 2)
10722		      (const_int 4)
10723		      (const_int 6)])))
10724	 (sign_extend:V4SI
10725	  (vec_select:V4HI
10726	   (match_dup 1)
10727	   (parallel [(const_int 1)
10728		      (const_int 3)
10729		      (const_int 5)
10730		      (const_int 7)])))))]
10731  "TARGET_XOP"
10732  "vphaddwd\t{%1, %0|%0, %1}"
10733  [(set_attr "type" "sseiadd1")])
10734
10735(define_insn "xop_phaddwq"
10736  [(set (match_operand:V2DI 0 "register_operand" "=x")
10737	(plus:V2DI
10738	 (plus:V2DI
10739	  (sign_extend:V2DI
10740	   (vec_select:V2HI
10741	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10742	    (parallel [(const_int 0)
10743		       (const_int 4)])))
10744	  (sign_extend:V2DI
10745	   (vec_select:V2HI
10746	    (match_dup 1)
10747	    (parallel [(const_int 1)
10748		       (const_int 5)]))))
10749	 (plus:V2DI
10750	  (sign_extend:V2DI
10751	   (vec_select:V2HI
10752	    (match_dup 1)
10753	    (parallel [(const_int 2)
10754		       (const_int 6)])))
10755	  (sign_extend:V2DI
10756	   (vec_select:V2HI
10757	    (match_dup 1)
10758	    (parallel [(const_int 3)
10759		       (const_int 7)]))))))]
10760  "TARGET_XOP"
10761  "vphaddwq\t{%1, %0|%0, %1}"
10762  [(set_attr "type" "sseiadd1")])
10763
10764(define_insn "xop_phadddq"
10765  [(set (match_operand:V2DI 0 "register_operand" "=x")
10766	(plus:V2DI
10767	 (sign_extend:V2DI
10768	  (vec_select:V2SI
10769	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10770	   (parallel [(const_int 0)
10771		      (const_int 2)])))
10772	 (sign_extend:V2DI
10773	  (vec_select:V2SI
10774	   (match_dup 1)
10775	   (parallel [(const_int 1)
10776		      (const_int 3)])))))]
10777  "TARGET_XOP"
10778  "vphadddq\t{%1, %0|%0, %1}"
10779  [(set_attr "type" "sseiadd1")])
10780
10781(define_insn "xop_phaddubw"
10782  [(set (match_operand:V8HI 0 "register_operand" "=x")
10783	(plus:V8HI
10784	 (zero_extend:V8HI
10785	  (vec_select:V8QI
10786	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10787	   (parallel [(const_int 0)
10788		      (const_int 2)
10789		      (const_int 4)
10790		      (const_int 6)
10791		      (const_int 8)
10792		      (const_int 10)
10793		      (const_int 12)
10794		      (const_int 14)])))
10795	 (zero_extend:V8HI
10796	  (vec_select:V8QI
10797	   (match_dup 1)
10798	   (parallel [(const_int 1)
10799		      (const_int 3)
10800		      (const_int 5)
10801		      (const_int 7)
10802		      (const_int 9)
10803		      (const_int 11)
10804		      (const_int 13)
10805		      (const_int 15)])))))]
10806  "TARGET_XOP"
10807  "vphaddubw\t{%1, %0|%0, %1}"
10808  [(set_attr "type" "sseiadd1")])
10809
10810(define_insn "xop_phaddubd"
10811  [(set (match_operand:V4SI 0 "register_operand" "=x")
10812	(plus:V4SI
10813	 (plus:V4SI
10814	  (zero_extend:V4SI
10815	   (vec_select:V4QI
10816	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10817	    (parallel [(const_int 0)
10818		       (const_int 4)
10819		       (const_int 8)
10820		       (const_int 12)])))
10821	  (zero_extend:V4SI
10822	   (vec_select:V4QI
10823	    (match_dup 1)
10824	    (parallel [(const_int 1)
10825		       (const_int 5)
10826		       (const_int 9)
10827		       (const_int 13)]))))
10828	 (plus:V4SI
10829	  (zero_extend:V4SI
10830	   (vec_select:V4QI
10831	    (match_dup 1)
10832	    (parallel [(const_int 2)
10833		       (const_int 6)
10834		       (const_int 10)
10835		       (const_int 14)])))
10836	  (zero_extend:V4SI
10837	   (vec_select:V4QI
10838	    (match_dup 1)
10839	    (parallel [(const_int 3)
10840		       (const_int 7)
10841		       (const_int 11)
10842		       (const_int 15)]))))))]
10843  "TARGET_XOP"
10844  "vphaddubd\t{%1, %0|%0, %1}"
10845  [(set_attr "type" "sseiadd1")])
10846
10847(define_insn "xop_phaddubq"
10848  [(set (match_operand:V2DI 0 "register_operand" "=x")
10849	(plus:V2DI
10850	 (plus:V2DI
10851	  (plus:V2DI
10852	   (zero_extend:V2DI
10853	    (vec_select:V2QI
10854	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10855	     (parallel [(const_int 0)
10856			(const_int 8)])))
10857	   (sign_extend:V2DI
10858	    (vec_select:V2QI
10859	     (match_dup 1)
10860	     (parallel [(const_int 1)
10861			(const_int 9)]))))
10862	  (plus:V2DI
10863	   (zero_extend:V2DI
10864	    (vec_select:V2QI
10865	     (match_dup 1)
10866	     (parallel [(const_int 2)
10867			(const_int 10)])))
10868	   (zero_extend:V2DI
10869	    (vec_select:V2QI
10870	     (match_dup 1)
10871	     (parallel [(const_int 3)
10872			(const_int 11)])))))
10873	 (plus:V2DI
10874	  (plus:V2DI
10875	   (zero_extend:V2DI
10876	    (vec_select:V2QI
10877	     (match_dup 1)
10878	     (parallel [(const_int 4)
10879			(const_int 12)])))
10880	   (sign_extend:V2DI
10881	    (vec_select:V2QI
10882	     (match_dup 1)
10883	     (parallel [(const_int 5)
10884			(const_int 13)]))))
10885	  (plus:V2DI
10886	   (zero_extend:V2DI
10887	    (vec_select:V2QI
10888	     (match_dup 1)
10889	     (parallel [(const_int 6)
10890			(const_int 14)])))
10891	   (zero_extend:V2DI
10892	    (vec_select:V2QI
10893	     (match_dup 1)
10894	     (parallel [(const_int 7)
10895			(const_int 15)])))))))]
10896  "TARGET_XOP"
10897  "vphaddubq\t{%1, %0|%0, %1}"
10898  [(set_attr "type" "sseiadd1")])
10899
10900(define_insn "xop_phadduwd"
10901  [(set (match_operand:V4SI 0 "register_operand" "=x")
10902	(plus:V4SI
10903	 (zero_extend:V4SI
10904	  (vec_select:V4HI
10905	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10906	   (parallel [(const_int 0)
10907		      (const_int 2)
10908		      (const_int 4)
10909		      (const_int 6)])))
10910	 (zero_extend:V4SI
10911	  (vec_select:V4HI
10912	   (match_dup 1)
10913	   (parallel [(const_int 1)
10914		      (const_int 3)
10915		      (const_int 5)
10916		      (const_int 7)])))))]
10917  "TARGET_XOP"
10918  "vphadduwd\t{%1, %0|%0, %1}"
10919  [(set_attr "type" "sseiadd1")])
10920
10921(define_insn "xop_phadduwq"
10922  [(set (match_operand:V2DI 0 "register_operand" "=x")
10923	(plus:V2DI
10924	 (plus:V2DI
10925	  (zero_extend:V2DI
10926	   (vec_select:V2HI
10927	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10928	    (parallel [(const_int 0)
10929		       (const_int 4)])))
10930	  (zero_extend:V2DI
10931	   (vec_select:V2HI
10932	    (match_dup 1)
10933	    (parallel [(const_int 1)
10934		       (const_int 5)]))))
10935	 (plus:V2DI
10936	  (zero_extend:V2DI
10937	   (vec_select:V2HI
10938	    (match_dup 1)
10939	    (parallel [(const_int 2)
10940		       (const_int 6)])))
10941	  (zero_extend:V2DI
10942	   (vec_select:V2HI
10943	    (match_dup 1)
10944	    (parallel [(const_int 3)
10945		       (const_int 7)]))))))]
10946  "TARGET_XOP"
10947  "vphadduwq\t{%1, %0|%0, %1}"
10948  [(set_attr "type" "sseiadd1")])
10949
10950(define_insn "xop_phaddudq"
10951  [(set (match_operand:V2DI 0 "register_operand" "=x")
10952	(plus:V2DI
10953	 (zero_extend:V2DI
10954	  (vec_select:V2SI
10955	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10956	   (parallel [(const_int 0)
10957		      (const_int 2)])))
10958	 (zero_extend:V2DI
10959	  (vec_select:V2SI
10960	   (match_dup 1)
10961	   (parallel [(const_int 1)
10962		      (const_int 3)])))))]
10963  "TARGET_XOP"
10964  "vphaddudq\t{%1, %0|%0, %1}"
10965  [(set_attr "type" "sseiadd1")])
10966
10967(define_insn "xop_phsubbw"
10968  [(set (match_operand:V8HI 0 "register_operand" "=x")
10969	(minus:V8HI
10970	 (sign_extend:V8HI
10971	  (vec_select:V8QI
10972	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10973	   (parallel [(const_int 0)
10974		      (const_int 2)
10975		      (const_int 4)
10976		      (const_int 6)
10977		      (const_int 8)
10978		      (const_int 10)
10979		      (const_int 12)
10980		      (const_int 14)])))
10981	 (sign_extend:V8HI
10982	  (vec_select:V8QI
10983	   (match_dup 1)
10984	   (parallel [(const_int 1)
10985		      (const_int 3)
10986		      (const_int 5)
10987		      (const_int 7)
10988		      (const_int 9)
10989		      (const_int 11)
10990		      (const_int 13)
10991		      (const_int 15)])))))]
10992  "TARGET_XOP"
10993  "vphsubbw\t{%1, %0|%0, %1}"
10994  [(set_attr "type" "sseiadd1")])
10995
10996(define_insn "xop_phsubwd"
10997  [(set (match_operand:V4SI 0 "register_operand" "=x")
10998	(minus:V4SI
10999	 (sign_extend:V4SI
11000	  (vec_select:V4HI
11001	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11002	   (parallel [(const_int 0)
11003		      (const_int 2)
11004		      (const_int 4)
11005		      (const_int 6)])))
11006	 (sign_extend:V4SI
11007	  (vec_select:V4HI
11008	   (match_dup 1)
11009	   (parallel [(const_int 1)
11010		      (const_int 3)
11011		      (const_int 5)
11012		      (const_int 7)])))))]
11013  "TARGET_XOP"
11014  "vphsubwd\t{%1, %0|%0, %1}"
11015  [(set_attr "type" "sseiadd1")])
11016
11017(define_insn "xop_phsubdq"
11018  [(set (match_operand:V2DI 0 "register_operand" "=x")
11019	(minus:V2DI
11020	 (sign_extend:V2DI
11021	  (vec_select:V2SI
11022	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11023	   (parallel [(const_int 0)
11024		      (const_int 2)])))
11025	 (sign_extend:V2DI
11026	  (vec_select:V2SI
11027	   (match_dup 1)
11028	   (parallel [(const_int 1)
11029		      (const_int 3)])))))]
11030  "TARGET_XOP"
11031  "vphsubdq\t{%1, %0|%0, %1}"
11032  [(set_attr "type" "sseiadd1")])
11033
11034;; XOP permute instructions
11035(define_insn "xop_pperm"
11036  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11037	(unspec:V16QI
11038	  [(match_operand:V16QI 1 "register_operand" "x,x")
11039	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11040	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11041	  UNSPEC_XOP_PERMUTE))]
11042  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11043  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11044  [(set_attr "type" "sse4arg")
11045   (set_attr "mode" "TI")])
11046
11047;; XOP pack instructions that combine two vectors into a smaller vector
11048(define_insn "xop_pperm_pack_v2di_v4si"
11049  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11050	(vec_concat:V4SI
11051	 (truncate:V2SI
11052	  (match_operand:V2DI 1 "register_operand" "x,x"))
11053	 (truncate:V2SI
11054	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11055   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11056  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11057  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11058  [(set_attr "type" "sse4arg")
11059   (set_attr "mode" "TI")])
11060
11061(define_insn "xop_pperm_pack_v4si_v8hi"
11062  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11063	(vec_concat:V8HI
11064	 (truncate:V4HI
11065	  (match_operand:V4SI 1 "register_operand" "x,x"))
11066	 (truncate:V4HI
11067	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11068   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11069  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11070  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11071  [(set_attr "type" "sse4arg")
11072   (set_attr "mode" "TI")])
11073
11074(define_insn "xop_pperm_pack_v8hi_v16qi"
11075  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11076	(vec_concat:V16QI
11077	 (truncate:V8QI
11078	  (match_operand:V8HI 1 "register_operand" "x,x"))
11079	 (truncate:V8QI
11080	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11081   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11082  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11083  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11084  [(set_attr "type" "sse4arg")
11085   (set_attr "mode" "TI")])
11086
11087;; XOP packed rotate instructions
11088(define_expand "rotl<mode>3"
11089  [(set (match_operand:VI_128 0 "register_operand" "")
11090	(rotate:VI_128
11091	 (match_operand:VI_128 1 "nonimmediate_operand" "")
11092	 (match_operand:SI 2 "general_operand")))]
11093  "TARGET_XOP"
11094{
11095  /* If we were given a scalar, convert it to parallel */
11096  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11097    {
11098      rtvec vs = rtvec_alloc (<ssescalarnum>);
11099      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11100      rtx reg = gen_reg_rtx (<MODE>mode);
11101      rtx op2 = operands[2];
11102      int i;
11103
11104      if (GET_MODE (op2) != <ssescalarmode>mode)
11105	{
11106	  op2 = gen_reg_rtx (<ssescalarmode>mode);
11107	  convert_move (op2, operands[2], false);
11108	}
11109
11110      for (i = 0; i < <ssescalarnum>; i++)
11111	RTVEC_ELT (vs, i) = op2;
11112
11113      emit_insn (gen_vec_init<mode> (reg, par));
11114      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11115      DONE;
11116    }
11117})
11118
11119(define_expand "rotr<mode>3"
11120  [(set (match_operand:VI_128 0 "register_operand" "")
11121	(rotatert:VI_128
11122	 (match_operand:VI_128 1 "nonimmediate_operand" "")
11123	 (match_operand:SI 2 "general_operand")))]
11124  "TARGET_XOP"
11125{
11126  /* If we were given a scalar, convert it to parallel */
11127  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11128    {
11129      rtvec vs = rtvec_alloc (<ssescalarnum>);
11130      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11131      rtx neg = gen_reg_rtx (<MODE>mode);
11132      rtx reg = gen_reg_rtx (<MODE>mode);
11133      rtx op2 = operands[2];
11134      int i;
11135
11136      if (GET_MODE (op2) != <ssescalarmode>mode)
11137	{
11138	  op2 = gen_reg_rtx (<ssescalarmode>mode);
11139	  convert_move (op2, operands[2], false);
11140	}
11141
11142      for (i = 0; i < <ssescalarnum>; i++)
11143	RTVEC_ELT (vs, i) = op2;
11144
11145      emit_insn (gen_vec_init<mode> (reg, par));
11146      emit_insn (gen_neg<mode>2 (neg, reg));
11147      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11148      DONE;
11149    }
11150})
11151
11152(define_insn "xop_rotl<mode>3"
11153  [(set (match_operand:VI_128 0 "register_operand" "=x")
11154	(rotate:VI_128
11155	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11156	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11157  "TARGET_XOP"
11158  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11159  [(set_attr "type" "sseishft")
11160   (set_attr "length_immediate" "1")
11161   (set_attr "mode" "TI")])
11162
11163(define_insn "xop_rotr<mode>3"
11164  [(set (match_operand:VI_128 0 "register_operand" "=x")
11165	(rotatert:VI_128
11166	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11167	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11168  "TARGET_XOP"
11169{
11170  operands[3]
11171    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
11172  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11173}
11174  [(set_attr "type" "sseishft")
11175   (set_attr "length_immediate" "1")
11176   (set_attr "mode" "TI")])
11177
11178(define_expand "vrotr<mode>3"
11179  [(match_operand:VI_128 0 "register_operand" "")
11180   (match_operand:VI_128 1 "register_operand" "")
11181   (match_operand:VI_128 2 "register_operand" "")]
11182  "TARGET_XOP"
11183{
11184  rtx reg = gen_reg_rtx (<MODE>mode);
11185  emit_insn (gen_neg<mode>2 (reg, operands[2]));
11186  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11187  DONE;
11188})
11189
11190(define_expand "vrotl<mode>3"
11191  [(match_operand:VI_128 0 "register_operand" "")
11192   (match_operand:VI_128 1 "register_operand" "")
11193   (match_operand:VI_128 2 "register_operand" "")]
11194  "TARGET_XOP"
11195{
11196  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11197  DONE;
11198})
11199
11200(define_insn "xop_vrotl<mode>3"
11201  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11202	(if_then_else:VI_128
11203	 (ge:VI_128
11204	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11205	  (const_int 0))
11206	 (rotate:VI_128
11207	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11208	  (match_dup 2))
11209	 (rotatert:VI_128
11210	  (match_dup 1)
11211	  (neg:VI_128 (match_dup 2)))))]
11212  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11213  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11214  [(set_attr "type" "sseishft")
11215   (set_attr "prefix_data16" "0")
11216   (set_attr "prefix_extra" "2")
11217   (set_attr "mode" "TI")])
11218
11219;; XOP packed shift instructions.
11220(define_expand "vlshr<mode>3"
11221  [(set (match_operand:VI12_128 0 "register_operand" "")
11222	(lshiftrt:VI12_128
11223	  (match_operand:VI12_128 1 "register_operand" "")
11224	  (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11225  "TARGET_XOP"
11226{
11227  rtx neg = gen_reg_rtx (<MODE>mode);
11228  emit_insn (gen_neg<mode>2 (neg, operands[2]));
11229  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11230  DONE;
11231})
11232
11233(define_expand "vlshr<mode>3"
11234  [(set (match_operand:VI48_128 0 "register_operand" "")
11235	(lshiftrt:VI48_128
11236	  (match_operand:VI48_128 1 "register_operand" "")
11237	  (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11238  "TARGET_AVX2 || TARGET_XOP"
11239{
11240  if (!TARGET_AVX2)
11241    {
11242      rtx neg = gen_reg_rtx (<MODE>mode);
11243      emit_insn (gen_neg<mode>2 (neg, operands[2]));
11244      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11245      DONE;
11246    }
11247})
11248
11249(define_expand "vlshr<mode>3"
11250  [(set (match_operand:VI48_256 0 "register_operand" "")
11251	(lshiftrt:VI48_256
11252	  (match_operand:VI48_256 1 "register_operand" "")
11253	  (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11254  "TARGET_AVX2")
11255
11256(define_expand "vashr<mode>3"
11257  [(set (match_operand:VI128_128 0 "register_operand" "")
11258	(ashiftrt:VI128_128
11259	  (match_operand:VI128_128 1 "register_operand" "")
11260	  (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11261  "TARGET_XOP"
11262{
11263  rtx neg = gen_reg_rtx (<MODE>mode);
11264  emit_insn (gen_neg<mode>2 (neg, operands[2]));
11265  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11266  DONE;
11267})
11268
11269(define_expand "vashrv4si3"
11270  [(set (match_operand:V4SI 0 "register_operand" "")
11271	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11272		       (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11273  "TARGET_AVX2 || TARGET_XOP"
11274{
11275  if (!TARGET_AVX2)
11276    {
11277      rtx neg = gen_reg_rtx (V4SImode);
11278      emit_insn (gen_negv4si2 (neg, operands[2]));
11279      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11280      DONE;
11281    }
11282})
11283
11284(define_expand "vashrv8si3"
11285  [(set (match_operand:V8SI 0 "register_operand" "")
11286	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11287		       (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11288  "TARGET_AVX2")
11289
11290(define_expand "vashl<mode>3"
11291  [(set (match_operand:VI12_128 0 "register_operand" "")
11292	(ashift:VI12_128
11293	  (match_operand:VI12_128 1 "register_operand" "")
11294	  (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11295  "TARGET_XOP"
11296{
11297  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11298  DONE;
11299})
11300
11301(define_expand "vashl<mode>3"
11302  [(set (match_operand:VI48_128 0 "register_operand" "")
11303	(ashift:VI48_128
11304	  (match_operand:VI48_128 1 "register_operand" "")
11305	  (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11306  "TARGET_AVX2 || TARGET_XOP"
11307{
11308  if (!TARGET_AVX2)
11309    {
11310      operands[2] = force_reg (<MODE>mode, operands[2]);
11311      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11312      DONE;
11313    }
11314})
11315
11316(define_expand "vashl<mode>3"
11317  [(set (match_operand:VI48_256 0 "register_operand" "")
11318	(ashift:VI48_256
11319	  (match_operand:VI48_256 1 "register_operand" "")
11320	  (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11321  "TARGET_AVX2")
11322
11323(define_insn "xop_sha<mode>3"
11324  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11325	(if_then_else:VI_128
11326	 (ge:VI_128
11327	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11328	  (const_int 0))
11329	 (ashift:VI_128
11330	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11331	  (match_dup 2))
11332	 (ashiftrt:VI_128
11333	  (match_dup 1)
11334	  (neg:VI_128 (match_dup 2)))))]
11335  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11336  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11337  [(set_attr "type" "sseishft")
11338   (set_attr "prefix_data16" "0")
11339   (set_attr "prefix_extra" "2")
11340   (set_attr "mode" "TI")])
11341
11342(define_insn "xop_shl<mode>3"
11343  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11344	(if_then_else:VI_128
11345	 (ge:VI_128
11346	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11347	  (const_int 0))
11348	 (ashift:VI_128
11349	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11350	  (match_dup 2))
11351	 (lshiftrt:VI_128
11352	  (match_dup 1)
11353	  (neg:VI_128 (match_dup 2)))))]
11354  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11355  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11356  [(set_attr "type" "sseishft")
11357   (set_attr "prefix_data16" "0")
11358   (set_attr "prefix_extra" "2")
11359   (set_attr "mode" "TI")])
11360
11361;; SSE2 doesn't have some shift variants, so define versions for XOP
11362(define_expand "ashlv16qi3"
11363  [(set (match_operand:V16QI 0 "register_operand" "")
11364	(ashift:V16QI
11365	  (match_operand:V16QI 1 "register_operand" "")
11366	  (match_operand:SI 2 "nonmemory_operand" "")))]
11367  "TARGET_XOP"
11368{
11369  rtx reg = gen_reg_rtx (V16QImode);
11370  rtx par;
11371  int i;
11372
11373  par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11374  for (i = 0; i < 16; i++)
11375    XVECEXP (par, 0, i) = operands[2];
11376
11377  emit_insn (gen_vec_initv16qi (reg, par));
11378  emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11379  DONE;
11380})
11381
11382(define_expand "<shift_insn>v16qi3"
11383  [(set (match_operand:V16QI 0 "register_operand" "")
11384	(any_shiftrt:V16QI
11385	  (match_operand:V16QI 1 "register_operand" "")
11386	  (match_operand:SI 2 "nonmemory_operand" "")))]
11387  "TARGET_XOP"
11388{
11389  rtx reg = gen_reg_rtx (V16QImode);
11390  rtx par;
11391  bool negate = false;
11392  rtx (*shift_insn)(rtx, rtx, rtx);
11393  int i;
11394
11395  if (CONST_INT_P (operands[2]))
11396    operands[2] = GEN_INT (-INTVAL (operands[2]));
11397  else
11398    negate = true;
11399
11400  par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11401  for (i = 0; i < 16; i++)
11402    XVECEXP (par, 0, i) = operands[2];
11403
11404  emit_insn (gen_vec_initv16qi (reg, par));
11405
11406  if (negate)
11407    emit_insn (gen_negv16qi2 (reg, reg));
11408
11409  if (<CODE> == LSHIFTRT)
11410    shift_insn = gen_xop_shlv16qi3;
11411  else
11412    shift_insn = gen_xop_shav16qi3;
11413
11414  emit_insn (shift_insn (operands[0], operands[1], reg));
11415  DONE;
11416})
11417
11418(define_expand "ashrv2di3"
11419  [(set (match_operand:V2DI 0 "register_operand" "")
11420	(ashiftrt:V2DI
11421	  (match_operand:V2DI 1 "register_operand" "")
11422	  (match_operand:DI 2 "nonmemory_operand" "")))]
11423  "TARGET_XOP"
11424{
11425  rtx reg = gen_reg_rtx (V2DImode);
11426  rtx par;
11427  bool negate = false;
11428  int i;
11429
11430  if (CONST_INT_P (operands[2]))
11431    operands[2] = GEN_INT (-INTVAL (operands[2]));
11432  else
11433    negate = true;
11434
11435  par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11436  for (i = 0; i < 2; i++)
11437    XVECEXP (par, 0, i) = operands[2];
11438
11439  emit_insn (gen_vec_initv2di (reg, par));
11440
11441  if (negate)
11442    emit_insn (gen_negv2di2 (reg, reg));
11443
11444  emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11445  DONE;
11446})
11447
11448;; XOP FRCZ support
11449(define_insn "xop_frcz<mode>2"
11450  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11451	(unspec:FMAMODE
11452	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11453	 UNSPEC_FRCZ))]
11454  "TARGET_XOP"
11455  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11456  [(set_attr "type" "ssecvt1")
11457   (set_attr "mode" "<MODE>")])
11458
11459(define_expand "xop_vmfrcz<mode>2"
11460  [(set (match_operand:VF_128 0 "register_operand")
11461	(vec_merge:VF_128
11462	  (unspec:VF_128
11463	   [(match_operand:VF_128 1 "nonimmediate_operand")]
11464	   UNSPEC_FRCZ)
11465	  (match_dup 2)
11466	  (const_int 1)))]
11467  "TARGET_XOP"
11468  "operands[2] = CONST0_RTX (<MODE>mode);")
11469
11470(define_insn "*xop_vmfrcz<mode>2"
11471  [(set (match_operand:VF_128 0 "register_operand" "=x")
11472	(vec_merge:VF_128
11473	  (unspec:VF_128
11474	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11475	   UNSPEC_FRCZ)
11476	  (match_operand:VF_128 2 "const0_operand")
11477	  (const_int 1)))]
11478  "TARGET_XOP"
11479  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11480  [(set_attr "type" "ssecvt1")
11481   (set_attr "mode" "<MODE>")])
11482
11483(define_insn "xop_maskcmp<mode>3"
11484  [(set (match_operand:VI_128 0 "register_operand" "=x")
11485	(match_operator:VI_128 1 "ix86_comparison_int_operator"
11486	 [(match_operand:VI_128 2 "register_operand" "x")
11487	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11488  "TARGET_XOP"
11489  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11490  [(set_attr "type" "sse4arg")
11491   (set_attr "prefix_data16" "0")
11492   (set_attr "prefix_rep" "0")
11493   (set_attr "prefix_extra" "2")
11494   (set_attr "length_immediate" "1")
11495   (set_attr "mode" "TI")])
11496
11497(define_insn "xop_maskcmp_uns<mode>3"
11498  [(set (match_operand:VI_128 0 "register_operand" "=x")
11499	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11500	 [(match_operand:VI_128 2 "register_operand" "x")
11501	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11502  "TARGET_XOP"
11503  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11504  [(set_attr "type" "ssecmp")
11505   (set_attr "prefix_data16" "0")
11506   (set_attr "prefix_rep" "0")
11507   (set_attr "prefix_extra" "2")
11508   (set_attr "length_immediate" "1")
11509   (set_attr "mode" "TI")])
11510
11511;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11512;; and pcomneu* not to be converted to the signed ones in case somebody needs
11513;; the exact instruction generated for the intrinsic.
11514(define_insn "xop_maskcmp_uns2<mode>3"
11515  [(set (match_operand:VI_128 0 "register_operand" "=x")
11516	(unspec:VI_128
11517	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11518	  [(match_operand:VI_128 2 "register_operand" "x")
11519	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11520	 UNSPEC_XOP_UNSIGNED_CMP))]
11521  "TARGET_XOP"
11522  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11523  [(set_attr "type" "ssecmp")
11524   (set_attr "prefix_data16" "0")
11525   (set_attr "prefix_extra" "2")
11526   (set_attr "length_immediate" "1")
11527   (set_attr "mode" "TI")])
11528
11529;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
11530;; being added here to be complete.
11531(define_insn "xop_pcom_tf<mode>3"
11532  [(set (match_operand:VI_128 0 "register_operand" "=x")
11533	(unspec:VI_128
11534	  [(match_operand:VI_128 1 "register_operand" "x")
11535	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11536	   (match_operand:SI 3 "const_int_operand" "n")]
11537	  UNSPEC_XOP_TRUEFALSE))]
11538  "TARGET_XOP"
11539{
11540  return ((INTVAL (operands[3]) != 0)
11541	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11542	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11543}
11544  [(set_attr "type" "ssecmp")
11545   (set_attr "prefix_data16" "0")
11546   (set_attr "prefix_extra" "2")
11547   (set_attr "length_immediate" "1")
11548   (set_attr "mode" "TI")])
11549
11550(define_insn "xop_vpermil2<mode>3"
11551  [(set (match_operand:VF 0 "register_operand" "=x")
11552	(unspec:VF
11553	  [(match_operand:VF 1 "register_operand" "x")
11554	   (match_operand:VF 2 "nonimmediate_operand" "%x")
11555	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11556	   (match_operand:SI 4 "const_0_to_3_operand" "n")]
11557	  UNSPEC_VPERMIL2))]
11558  "TARGET_XOP"
11559  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11560  [(set_attr "type" "sse4arg")
11561   (set_attr "length_immediate" "1")
11562   (set_attr "mode" "<MODE>")])
11563
11564;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11565
11566(define_insn "aesenc"
11567  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11568	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11569		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11570		      UNSPEC_AESENC))]
11571  "TARGET_AES"
11572  "@
11573   aesenc\t{%2, %0|%0, %2}
11574   vaesenc\t{%2, %1, %0|%0, %1, %2}"
11575  [(set_attr "isa" "noavx,avx")
11576   (set_attr "type" "sselog1")
11577   (set_attr "prefix_extra" "1")
11578   (set_attr "prefix" "orig,vex")
11579   (set_attr "mode" "TI")])
11580
11581(define_insn "aesenclast"
11582  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11583	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11584		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11585		      UNSPEC_AESENCLAST))]
11586  "TARGET_AES"
11587  "@
11588   aesenclast\t{%2, %0|%0, %2}
11589   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11590  [(set_attr "isa" "noavx,avx")
11591   (set_attr "type" "sselog1")
11592   (set_attr "prefix_extra" "1")
11593   (set_attr "prefix" "orig,vex")
11594   (set_attr "mode" "TI")])
11595
11596(define_insn "aesdec"
11597  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11598	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11599		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11600		      UNSPEC_AESDEC))]
11601  "TARGET_AES"
11602  "@
11603   aesdec\t{%2, %0|%0, %2}
11604   vaesdec\t{%2, %1, %0|%0, %1, %2}"
11605  [(set_attr "isa" "noavx,avx")
11606   (set_attr "type" "sselog1")
11607   (set_attr "prefix_extra" "1")
11608   (set_attr "prefix" "orig,vex")
11609   (set_attr "mode" "TI")])
11610
11611(define_insn "aesdeclast"
11612  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11613	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11614		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11615		      UNSPEC_AESDECLAST))]
11616  "TARGET_AES"
11617  "@
11618   aesdeclast\t{%2, %0|%0, %2}
11619   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11620  [(set_attr "isa" "noavx,avx")
11621   (set_attr "type" "sselog1")
11622   (set_attr "prefix_extra" "1")
11623   (set_attr "prefix" "orig,vex")
11624   (set_attr "mode" "TI")])
11625
11626(define_insn "aesimc"
11627  [(set (match_operand:V2DI 0 "register_operand" "=x")
11628	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11629		      UNSPEC_AESIMC))]
11630  "TARGET_AES"
11631  "%vaesimc\t{%1, %0|%0, %1}"
11632  [(set_attr "type" "sselog1")
11633   (set_attr "prefix_extra" "1")
11634   (set_attr "prefix" "maybe_vex")
11635   (set_attr "mode" "TI")])
11636
11637(define_insn "aeskeygenassist"
11638  [(set (match_operand:V2DI 0 "register_operand" "=x")
11639	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11640		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
11641		     UNSPEC_AESKEYGENASSIST))]
11642  "TARGET_AES"
11643  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11644  [(set_attr "type" "sselog1")
11645   (set_attr "prefix_extra" "1")
11646   (set_attr "length_immediate" "1")
11647   (set_attr "prefix" "maybe_vex")
11648   (set_attr "mode" "TI")])
11649
11650(define_insn "pclmulqdq"
11651  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11652	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11653		      (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11654		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11655		     UNSPEC_PCLMUL))]
11656  "TARGET_PCLMUL"
11657  "@
11658   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11659   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11660  [(set_attr "isa" "noavx,avx")
11661   (set_attr "type" "sselog1")
11662   (set_attr "prefix_extra" "1")
11663   (set_attr "length_immediate" "1")
11664   (set_attr "prefix" "orig,vex")
11665   (set_attr "mode" "TI")])
11666
11667(define_expand "avx_vzeroall"
11668  [(match_par_dup 0 [(const_int 0)])]
11669  "TARGET_AVX"
11670{
11671  int nregs = TARGET_64BIT ? 16 : 8;
11672  int regno;
11673
11674  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11675
11676  XVECEXP (operands[0], 0, 0)
11677    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11678			       UNSPECV_VZEROALL);
11679
11680  for (regno = 0; regno < nregs; regno++)
11681    XVECEXP (operands[0], 0, regno + 1)
11682      = gen_rtx_SET (VOIDmode,
11683		     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11684		     CONST0_RTX (V8SImode));
11685})
11686
11687(define_insn "*avx_vzeroall"
11688  [(match_parallel 0 "vzeroall_operation"
11689    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11690  "TARGET_AVX"
11691  "vzeroall"
11692  [(set_attr "type" "sse")
11693   (set_attr "modrm" "0")
11694   (set_attr "memory" "none")
11695   (set_attr "prefix" "vex")
11696   (set_attr "mode" "OI")])
11697
11698;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11699;; if the upper 128bits are unused.
11700(define_insn "avx_vzeroupper"
11701  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11702		    UNSPECV_VZEROUPPER)]
11703  "TARGET_AVX"
11704  "vzeroupper"
11705  [(set_attr "type" "sse")
11706   (set_attr "modrm" "0")
11707   (set_attr "memory" "none")
11708   (set_attr "prefix" "vex")
11709   (set_attr "mode" "OI")])
11710
11711(define_mode_attr AVXTOSSEMODE
11712  [(V4DI "V2DI") (V2DI "V2DI")
11713   (V8SI "V4SI") (V4SI "V4SI")
11714   (V16HI "V8HI") (V8HI "V8HI")
11715   (V32QI "V16QI") (V16QI "V16QI")])
11716
11717(define_insn "avx2_pbroadcast<mode>"
11718  [(set (match_operand:VI 0 "register_operand" "=x")
11719	(vec_duplicate:VI
11720	  (vec_select:<ssescalarmode>
11721	    (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11722	    (parallel [(const_int 0)]))))]
11723  "TARGET_AVX2"
11724  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11725  [(set_attr "type" "ssemov")
11726   (set_attr "prefix_extra" "1")
11727   (set_attr "prefix" "vex")
11728   (set_attr "mode" "<sseinsnmode>")])
11729
11730(define_insn "avx2_permvarv8si"
11731  [(set (match_operand:V8SI 0 "register_operand" "=x")
11732	(unspec:V8SI
11733	  [(match_operand:V8SI 1 "nonimmediate_operand" "xm")
11734	   (match_operand:V8SI 2 "register_operand" "x")]
11735	  UNSPEC_VPERMSI))]
11736  "TARGET_AVX2"
11737  "vpermd\t{%1, %2, %0|%0, %2, %1}"
11738  [(set_attr "type" "sselog")
11739   (set_attr "prefix" "vex")
11740   (set_attr "mode" "OI")])
11741
11742(define_insn "avx2_permv4df"
11743  [(set (match_operand:V4DF 0 "register_operand" "=x")
11744	(unspec:V4DF
11745	  [(match_operand:V4DF 1 "register_operand" "xm")
11746	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
11747	  UNSPEC_VPERMDF))]
11748  "TARGET_AVX2"
11749  "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11750  [(set_attr "type" "sselog")
11751   (set_attr "prefix_extra" "1")
11752   (set_attr "prefix" "vex")
11753   (set_attr "mode" "OI")])
11754
11755(define_insn "avx2_permvarv8sf"
11756  [(set (match_operand:V8SF 0 "register_operand" "=x")
11757	(unspec:V8SF
11758	  [(match_operand:V8SF 1 "nonimmediate_operand" "xm")
11759	   (match_operand:V8SI 2 "register_operand" "x")]
11760	  UNSPEC_VPERMSF))]
11761  "TARGET_AVX2"
11762  "vpermps\t{%1, %2, %0|%0, %2, %1}"
11763  [(set_attr "type" "sselog")
11764   (set_attr "prefix" "vex")
11765   (set_attr "mode" "OI")])
11766
11767(define_expand "avx2_permv4di"
11768  [(match_operand:V4DI 0 "register_operand" "")
11769   (match_operand:V4DI 1 "nonimmediate_operand" "")
11770   (match_operand:SI 2 "const_0_to_255_operand" "")]
11771  "TARGET_AVX2"
11772{
11773  int mask = INTVAL (operands[2]);
11774  emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11775				  GEN_INT ((mask >> 0) & 3),
11776				  GEN_INT ((mask >> 2) & 3),
11777				  GEN_INT ((mask >> 4) & 3),
11778				  GEN_INT ((mask >> 6) & 3)));
11779  DONE;
11780})
11781
11782(define_insn "avx2_permv4di_1"
11783  [(set (match_operand:V4DI 0 "register_operand" "=x")
11784	(vec_select:V4DI
11785	  (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11786	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
11787		     (match_operand 3 "const_0_to_3_operand" "")
11788		     (match_operand 4 "const_0_to_3_operand" "")
11789		     (match_operand 5 "const_0_to_3_operand" "")])))]
11790  "TARGET_AVX2"
11791{
11792  int mask = 0;
11793  mask |= INTVAL (operands[2]) << 0;
11794  mask |= INTVAL (operands[3]) << 2;
11795  mask |= INTVAL (operands[4]) << 4;
11796  mask |= INTVAL (operands[5]) << 6;
11797  operands[2] = GEN_INT (mask);
11798  return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11799}
11800  [(set_attr "type" "sselog")
11801   (set_attr "prefix" "vex")
11802   (set_attr "mode" "OI")])
11803
11804(define_insn "avx2_permv2ti"
11805  [(set (match_operand:V4DI 0 "register_operand" "=x")
11806	(unspec:V4DI
11807	  [(match_operand:V4DI 1 "register_operand" "x")
11808	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11809	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
11810	  UNSPEC_VPERMTI))]
11811  "TARGET_AVX2"
11812  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11813  [(set_attr "type" "sselog")
11814   (set_attr "prefix" "vex")
11815   (set_attr "mode" "OI")])
11816
11817(define_insn "avx2_vec_dupv4df"
11818  [(set (match_operand:V4DF 0 "register_operand" "=x")
11819	(vec_duplicate:V4DF
11820	  (vec_select:DF
11821	    (match_operand:V2DF 1 "register_operand" "x")
11822	    (parallel [(const_int 0)]))))]
11823  "TARGET_AVX2"
11824  "vbroadcastsd\t{%1, %0|%0, %1}"
11825  [(set_attr "type" "sselog1")
11826   (set_attr "prefix" "vex")
11827   (set_attr "mode" "V4DF")])
11828
11829;; Modes handled by AVX vec_dup patterns.
11830(define_mode_iterator AVX_VEC_DUP_MODE
11831  [V8SI V8SF V4DI V4DF])
11832
11833(define_insn "vec_dup<mode>"
11834  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11835	(vec_duplicate:AVX_VEC_DUP_MODE
11836	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11837  "TARGET_AVX"
11838  "@
11839   vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11840   #"
11841  [(set_attr "type" "ssemov")
11842   (set_attr "prefix_extra" "1")
11843   (set_attr "prefix" "vex")
11844   (set_attr "mode" "V8SF")])
11845
11846(define_insn "avx2_vbroadcasti128_<mode>"
11847  [(set (match_operand:VI_256 0 "register_operand" "=x")
11848	(vec_concat:VI_256
11849	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11850	  (match_dup 1)))]
11851  "TARGET_AVX2"
11852  "vbroadcasti128\t{%1, %0|%0, %1}"
11853  [(set_attr "type" "ssemov")
11854   (set_attr "prefix_extra" "1")
11855   (set_attr "prefix" "vex")
11856   (set_attr "mode" "OI")])
11857
11858(define_split
11859  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11860	(vec_duplicate:AVX_VEC_DUP_MODE
11861	  (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11862  "TARGET_AVX && reload_completed"
11863  [(set (match_dup 2)
11864	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11865   (set (match_dup 0)
11866	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11867  "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11868
11869(define_insn "avx_vbroadcastf128_<mode>"
11870  [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11871	(vec_concat:V_256
11872	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11873	  (match_dup 1)))]
11874  "TARGET_AVX"
11875  "@
11876   vbroadcast<i128>\t{%1, %0|%0, %1}
11877   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11878   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11879  [(set_attr "type" "ssemov,sselog1,sselog1")
11880   (set_attr "prefix_extra" "1")
11881   (set_attr "length_immediate" "0,1,1")
11882   (set_attr "prefix" "vex")
11883   (set_attr "mode" "<sseinsnmode>")])
11884
11885;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11886;; If it so happens that the input is in memory, use vbroadcast.
11887;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11888(define_insn "*avx_vperm_broadcast_v4sf"
11889  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11890	(vec_select:V4SF
11891	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11892	  (match_parallel 2 "avx_vbroadcast_operand"
11893	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
11894  "TARGET_AVX"
11895{
11896  int elt = INTVAL (operands[3]);
11897  switch (which_alternative)
11898    {
11899    case 0:
11900    case 1:
11901      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11902      return "vbroadcastss\t{%1, %0|%0, %1}";
11903    case 2:
11904      operands[2] = GEN_INT (elt * 0x55);
11905      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11906    default:
11907      gcc_unreachable ();
11908    }
11909}
11910  [(set_attr "type" "ssemov,ssemov,sselog1")
11911   (set_attr "prefix_extra" "1")
11912   (set_attr "length_immediate" "0,0,1")
11913   (set_attr "prefix" "vex")
11914   (set_attr "mode" "SF,SF,V4SF")])
11915
11916(define_insn_and_split "*avx_vperm_broadcast_<mode>"
11917  [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11918	(vec_select:VF_256
11919	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11920	  (match_parallel 2 "avx_vbroadcast_operand"
11921	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
11922  "TARGET_AVX"
11923  "#"
11924  "&& reload_completed"
11925  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11926{
11927  rtx op0 = operands[0], op1 = operands[1];
11928  int elt = INTVAL (operands[3]);
11929
11930  if (REG_P (op1))
11931    {
11932      int mask;
11933
11934      /* Shuffle element we care about into all elements of the 128-bit lane.
11935	 The other lane gets shuffled too, but we don't care.  */
11936      if (<MODE>mode == V4DFmode)
11937	mask = (elt & 1 ? 15 : 0);
11938      else
11939	mask = (elt & 3) * 0x55;
11940      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11941
11942      /* Shuffle the lane we care about into both lanes of the dest.  */
11943      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11944      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11945      DONE;
11946    }
11947
11948  operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11949				   elt * GET_MODE_SIZE (<ssescalarmode>mode));
11950})
11951
11952(define_expand "avx_vpermil<mode>"
11953  [(set (match_operand:VF2 0 "register_operand" "")
11954	(vec_select:VF2
11955	  (match_operand:VF2 1 "nonimmediate_operand" "")
11956	  (match_operand:SI 2 "const_0_to_255_operand" "")))]
11957  "TARGET_AVX"
11958{
11959  int mask = INTVAL (operands[2]);
11960  rtx perm[<ssescalarnum>];
11961
11962  perm[0] = GEN_INT (mask & 1);
11963  perm[1] = GEN_INT ((mask >> 1) & 1);
11964  if (<MODE>mode == V4DFmode)
11965    {
11966      perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11967      perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11968    }
11969
11970  operands[2]
11971    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11972})
11973
11974(define_expand "avx_vpermil<mode>"
11975  [(set (match_operand:VF1 0 "register_operand" "")
11976	(vec_select:VF1
11977	  (match_operand:VF1 1 "nonimmediate_operand" "")
11978	  (match_operand:SI 2 "const_0_to_255_operand" "")))]
11979  "TARGET_AVX"
11980{
11981  int mask = INTVAL (operands[2]);
11982  rtx perm[<ssescalarnum>];
11983
11984  perm[0] = GEN_INT (mask & 3);
11985  perm[1] = GEN_INT ((mask >> 2) & 3);
11986  perm[2] = GEN_INT ((mask >> 4) & 3);
11987  perm[3] = GEN_INT ((mask >> 6) & 3);
11988  if (<MODE>mode == V8SFmode)
11989    {
11990      perm[4] = GEN_INT ((mask & 3) + 4);
11991      perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11992      perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11993      perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11994    }
11995
11996  operands[2]
11997    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11998})
11999
12000(define_insn "*avx_vpermilp<mode>"
12001  [(set (match_operand:VF 0 "register_operand" "=x")
12002	(vec_select:VF
12003	  (match_operand:VF 1 "nonimmediate_operand" "xm")
12004	  (match_parallel 2 ""
12005	    [(match_operand 3 "const_int_operand" "")])))]
12006  "TARGET_AVX
12007   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
12008{
12009  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
12010  operands[2] = GEN_INT (mask);
12011  return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
12012}
12013  [(set_attr "type" "sselog")
12014   (set_attr "prefix_extra" "1")
12015   (set_attr "length_immediate" "1")
12016   (set_attr "prefix" "vex")
12017   (set_attr "mode" "<MODE>")])
12018
12019(define_insn "avx_vpermilvar<mode>3"
12020  [(set (match_operand:VF 0 "register_operand" "=x")
12021	(unspec:VF
12022	  [(match_operand:VF 1 "register_operand" "x")
12023	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
12024	  UNSPEC_VPERMIL))]
12025  "TARGET_AVX"
12026  "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12027  [(set_attr "type" "sselog")
12028   (set_attr "prefix_extra" "1")
12029   (set_attr "prefix" "vex")
12030   (set_attr "mode" "<MODE>")])
12031
12032(define_expand "avx_vperm2f128<mode>3"
12033  [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12034	(unspec:AVX256MODE2P
12035	  [(match_operand:AVX256MODE2P 1 "register_operand" "")
12036	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12037	   (match_operand:SI 3 "const_0_to_255_operand" "")]
12038	  UNSPEC_VPERMIL2F128))]
12039  "TARGET_AVX"
12040{
12041  int mask = INTVAL (operands[3]);
12042  if ((mask & 0x88) == 0)
12043    {
12044      rtx perm[<ssescalarnum>], t1, t2;
12045      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12046
12047      base = (mask & 3) * nelt2;
12048      for (i = 0; i < nelt2; ++i)
12049	perm[i] = GEN_INT (base + i);
12050
12051      base = ((mask >> 4) & 3) * nelt2;
12052      for (i = 0; i < nelt2; ++i)
12053	perm[i + nelt2] = GEN_INT (base + i);
12054
12055      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12056			       operands[1], operands[2]);
12057      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12058      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12059      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12060      emit_insn (t2);
12061      DONE;
12062    }
12063})
12064
12065;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12066;; means that in order to represent this properly in rtl we'd have to
12067;; nest *another* vec_concat with a zero operand and do the select from
12068;; a 4x wide vector.  That doesn't seem very nice.
12069(define_insn "*avx_vperm2f128<mode>_full"
12070  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12071	(unspec:AVX256MODE2P
12072	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12073	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12074	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
12075	  UNSPEC_VPERMIL2F128))]
12076  "TARGET_AVX"
12077  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12078  [(set_attr "type" "sselog")
12079   (set_attr "prefix_extra" "1")
12080   (set_attr "length_immediate" "1")
12081   (set_attr "prefix" "vex")
12082   (set_attr "mode" "<sseinsnmode>")])
12083
12084(define_insn "*avx_vperm2f128<mode>_nozero"
12085  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12086	(vec_select:AVX256MODE2P
12087	  (vec_concat:<ssedoublevecmode>
12088	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
12089	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12090	  (match_parallel 3 ""
12091	    [(match_operand 4 "const_int_operand" "")])))]
12092  "TARGET_AVX
12093   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12094{
12095  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12096  if (mask == 0x12)
12097    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12098  if (mask == 0x20)
12099    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12100  operands[3] = GEN_INT (mask);
12101  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12102}
12103  [(set_attr "type" "sselog")
12104   (set_attr "prefix_extra" "1")
12105   (set_attr "length_immediate" "1")
12106   (set_attr "prefix" "vex")
12107   (set_attr "mode" "<sseinsnmode>")])
12108
12109(define_expand "avx_vinsertf128<mode>"
12110  [(match_operand:V_256 0 "register_operand" "")
12111   (match_operand:V_256 1 "register_operand" "")
12112   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12113   (match_operand:SI 3 "const_0_to_1_operand" "")]
12114  "TARGET_AVX"
12115{
12116  rtx (*insn)(rtx, rtx, rtx);
12117
12118  switch (INTVAL (operands[3]))
12119    {
12120    case 0:
12121      insn = gen_vec_set_lo_<mode>;
12122      break;
12123    case 1:
12124      insn = gen_vec_set_hi_<mode>;
12125      break;
12126    default:
12127      gcc_unreachable ();
12128    }
12129
12130  emit_insn (insn (operands[0], operands[1], operands[2]));
12131  DONE;
12132})
12133
12134(define_insn "avx2_vec_set_lo_v4di"
12135  [(set (match_operand:V4DI 0 "register_operand" "=x")
12136	(vec_concat:V4DI
12137	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12138	  (vec_select:V2DI
12139	    (match_operand:V4DI 1 "register_operand" "x")
12140	    (parallel [(const_int 2) (const_int 3)]))))]
12141  "TARGET_AVX2"
12142  "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12143  [(set_attr "type" "sselog")
12144   (set_attr "prefix_extra" "1")
12145   (set_attr "length_immediate" "1")
12146   (set_attr "prefix" "vex")
12147   (set_attr "mode" "OI")])
12148
12149(define_insn "avx2_vec_set_hi_v4di"
12150  [(set (match_operand:V4DI 0 "register_operand" "=x")
12151	(vec_concat:V4DI
12152	  (vec_select:V2DI
12153	    (match_operand:V4DI 1 "register_operand" "x")
12154	    (parallel [(const_int 0) (const_int 1)]))
12155	  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12156  "TARGET_AVX2"
12157  "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12158  [(set_attr "type" "sselog")
12159   (set_attr "prefix_extra" "1")
12160   (set_attr "length_immediate" "1")
12161   (set_attr "prefix" "vex")
12162   (set_attr "mode" "OI")])
12163
12164(define_insn "vec_set_lo_<mode>"
12165  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12166	(vec_concat:VI8F_256
12167	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12168	  (vec_select:<ssehalfvecmode>
12169	    (match_operand:VI8F_256 1 "register_operand" "x")
12170	    (parallel [(const_int 2) (const_int 3)]))))]
12171  "TARGET_AVX"
12172  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12173  [(set_attr "type" "sselog")
12174   (set_attr "prefix_extra" "1")
12175   (set_attr "length_immediate" "1")
12176   (set_attr "prefix" "vex")
12177   (set_attr "mode" "<sseinsnmode>")])
12178
12179(define_insn "vec_set_hi_<mode>"
12180  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12181	(vec_concat:VI8F_256
12182	  (vec_select:<ssehalfvecmode>
12183	    (match_operand:VI8F_256 1 "register_operand" "x")
12184	    (parallel [(const_int 0) (const_int 1)]))
12185	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12186  "TARGET_AVX"
12187  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12188  [(set_attr "type" "sselog")
12189   (set_attr "prefix_extra" "1")
12190   (set_attr "length_immediate" "1")
12191   (set_attr "prefix" "vex")
12192   (set_attr "mode" "<sseinsnmode>")])
12193
12194(define_insn "vec_set_lo_<mode>"
12195  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12196	(vec_concat:VI4F_256
12197	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12198	  (vec_select:<ssehalfvecmode>
12199	    (match_operand:VI4F_256 1 "register_operand" "x")
12200	    (parallel [(const_int 4) (const_int 5)
12201		       (const_int 6) (const_int 7)]))))]
12202  "TARGET_AVX"
12203  "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12204  [(set_attr "type" "sselog")
12205   (set_attr "prefix_extra" "1")
12206   (set_attr "length_immediate" "1")
12207   (set_attr "prefix" "vex")
12208   (set_attr "mode" "<sseinsnmode>")])
12209
12210(define_insn "vec_set_hi_<mode>"
12211  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12212	(vec_concat:VI4F_256
12213	  (vec_select:<ssehalfvecmode>
12214	    (match_operand:VI4F_256 1 "register_operand" "x")
12215	    (parallel [(const_int 0) (const_int 1)
12216		       (const_int 2) (const_int 3)]))
12217	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12218  "TARGET_AVX"
12219  "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12220  [(set_attr "type" "sselog")
12221   (set_attr "prefix_extra" "1")
12222   (set_attr "length_immediate" "1")
12223   (set_attr "prefix" "vex")
12224   (set_attr "mode" "<sseinsnmode>")])
12225
12226(define_insn "vec_set_lo_v16hi"
12227  [(set (match_operand:V16HI 0 "register_operand" "=x")
12228	(vec_concat:V16HI
12229	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12230	  (vec_select:V8HI
12231	    (match_operand:V16HI 1 "register_operand" "x")
12232	    (parallel [(const_int 8) (const_int 9)
12233		       (const_int 10) (const_int 11)
12234		       (const_int 12) (const_int 13)
12235		       (const_int 14) (const_int 15)]))))]
12236  "TARGET_AVX"
12237  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12238  [(set_attr "type" "sselog")
12239   (set_attr "prefix_extra" "1")
12240   (set_attr "length_immediate" "1")
12241   (set_attr "prefix" "vex")
12242   (set_attr "mode" "OI")])
12243
12244(define_insn "vec_set_hi_v16hi"
12245  [(set (match_operand:V16HI 0 "register_operand" "=x")
12246	(vec_concat:V16HI
12247	  (vec_select:V8HI
12248	    (match_operand:V16HI 1 "register_operand" "x")
12249	    (parallel [(const_int 0) (const_int 1)
12250		       (const_int 2) (const_int 3)
12251		       (const_int 4) (const_int 5)
12252		       (const_int 6) (const_int 7)]))
12253	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12254  "TARGET_AVX"
12255  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12256  [(set_attr "type" "sselog")
12257   (set_attr "prefix_extra" "1")
12258   (set_attr "length_immediate" "1")
12259   (set_attr "prefix" "vex")
12260   (set_attr "mode" "OI")])
12261
12262(define_insn "vec_set_lo_v32qi"
12263  [(set (match_operand:V32QI 0 "register_operand" "=x")
12264	(vec_concat:V32QI
12265	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12266	  (vec_select:V16QI
12267	    (match_operand:V32QI 1 "register_operand" "x")
12268	    (parallel [(const_int 16) (const_int 17)
12269		       (const_int 18) (const_int 19)
12270		       (const_int 20) (const_int 21)
12271		       (const_int 22) (const_int 23)
12272		       (const_int 24) (const_int 25)
12273		       (const_int 26) (const_int 27)
12274		       (const_int 28) (const_int 29)
12275		       (const_int 30) (const_int 31)]))))]
12276  "TARGET_AVX"
12277  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12278  [(set_attr "type" "sselog")
12279   (set_attr "prefix_extra" "1")
12280   (set_attr "length_immediate" "1")
12281   (set_attr "prefix" "vex")
12282   (set_attr "mode" "OI")])
12283
12284(define_insn "vec_set_hi_v32qi"
12285  [(set (match_operand:V32QI 0 "register_operand" "=x")
12286	(vec_concat:V32QI
12287	  (vec_select:V16QI
12288	    (match_operand:V32QI 1 "register_operand" "x")
12289	    (parallel [(const_int 0) (const_int 1)
12290		       (const_int 2) (const_int 3)
12291		       (const_int 4) (const_int 5)
12292		       (const_int 6) (const_int 7)
12293		       (const_int 8) (const_int 9)
12294		       (const_int 10) (const_int 11)
12295		       (const_int 12) (const_int 13)
12296		       (const_int 14) (const_int 15)]))
12297	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12298  "TARGET_AVX"
12299  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12300  [(set_attr "type" "sselog")
12301   (set_attr "prefix_extra" "1")
12302   (set_attr "length_immediate" "1")
12303   (set_attr "prefix" "vex")
12304   (set_attr "mode" "OI")])
12305
12306(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12307  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12308	(unspec:V48_AVX2
12309	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12310	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
12311	  UNSPEC_MASKMOV))]
12312  "TARGET_AVX"
12313  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12314  [(set_attr "type" "sselog1")
12315   (set_attr "prefix_extra" "1")
12316   (set_attr "prefix" "vex")
12317   (set_attr "mode" "<sseinsnmode>")])
12318
12319(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12320  [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
12321	(unspec:V48_AVX2
12322	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12323	   (match_operand:V48_AVX2 2 "register_operand" "x")
12324	   (match_dup 0)]
12325	  UNSPEC_MASKMOV))]
12326  "TARGET_AVX"
12327  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12328  [(set_attr "type" "sselog1")
12329   (set_attr "prefix_extra" "1")
12330   (set_attr "prefix" "vex")
12331   (set_attr "mode" "<sseinsnmode>")])
12332
12333(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12334  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12335	(unspec:AVX256MODE2P
12336	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12337	  UNSPEC_CAST))]
12338  "TARGET_AVX"
12339  "#"
12340  "&& reload_completed"
12341  [(const_int 0)]
12342{
12343  rtx op0 = operands[0];
12344  rtx op1 = operands[1];
12345  if (REG_P (op0))
12346    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12347  else
12348    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12349  emit_move_insn (op0, op1);
12350  DONE;
12351})
12352
12353(define_expand "vec_init<mode>"
12354  [(match_operand:V_256 0 "register_operand" "")
12355   (match_operand 1 "" "")]
12356  "TARGET_AVX"
12357{
12358  ix86_expand_vector_init (false, operands[0], operands[1]);
12359  DONE;
12360})
12361
12362(define_expand "avx2_extracti128"
12363  [(match_operand:V2DI 0 "nonimmediate_operand" "")
12364   (match_operand:V4DI 1 "register_operand" "")
12365   (match_operand:SI 2 "const_0_to_1_operand" "")]
12366  "TARGET_AVX2"
12367{
12368  rtx (*insn)(rtx, rtx);
12369
12370  switch (INTVAL (operands[2]))
12371    {
12372    case 0:
12373      insn = gen_vec_extract_lo_v4di;
12374      break;
12375    case 1:
12376      insn = gen_vec_extract_hi_v4di;
12377      break;
12378    default:
12379      gcc_unreachable ();
12380    }
12381
12382  emit_insn (insn (operands[0], operands[1]));
12383  DONE;
12384})
12385
12386(define_expand "avx2_inserti128"
12387  [(match_operand:V4DI 0 "register_operand" "")
12388   (match_operand:V4DI 1 "register_operand" "")
12389   (match_operand:V2DI 2 "nonimmediate_operand" "")
12390   (match_operand:SI 3 "const_0_to_1_operand" "")]
12391  "TARGET_AVX2"
12392{
12393  rtx (*insn)(rtx, rtx, rtx);
12394
12395  switch (INTVAL (operands[3]))
12396    {
12397    case 0:
12398      insn = gen_avx2_vec_set_lo_v4di;
12399      break;
12400    case 1:
12401      insn = gen_avx2_vec_set_hi_v4di;
12402      break;
12403    default:
12404      gcc_unreachable ();
12405    }
12406
12407  emit_insn (insn (operands[0], operands[1], operands[2]));
12408  DONE;
12409})
12410
12411(define_insn "avx2_ashrv<mode>"
12412  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12413	(ashiftrt:VI4_AVX2
12414	  (match_operand:VI4_AVX2 1 "register_operand" "x")
12415	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12416  "TARGET_AVX2"
12417  "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12418  [(set_attr "type" "sseishft")
12419   (set_attr "prefix" "vex")
12420   (set_attr "mode" "<sseinsnmode>")])
12421
12422(define_insn "avx2_<shift_insn>v<mode>"
12423  [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12424	(any_lshift:VI48_AVX2
12425	  (match_operand:VI48_AVX2 1 "register_operand" "x")
12426	  (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12427  "TARGET_AVX2"
12428  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12429  [(set_attr "type" "sseishft")
12430   (set_attr "prefix" "vex")
12431   (set_attr "mode" "<sseinsnmode>")])
12432
12433(define_insn "avx_vec_concat<mode>"
12434  [(set (match_operand:V_256 0 "register_operand" "=x,x")
12435	(vec_concat:V_256
12436	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12437	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12438  "TARGET_AVX"
12439{
12440  switch (which_alternative)
12441    {
12442    case 0:
12443      return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12444    case 1:
12445      switch (get_attr_mode (insn))
12446	{
12447	case MODE_V8SF:
12448	  return "vmovaps\t{%1, %x0|%x0, %1}";
12449	case MODE_V4DF:
12450	  return "vmovapd\t{%1, %x0|%x0, %1}";
12451	default:
12452	  return "vmovdqa\t{%1, %x0|%x0, %1}";
12453	}
12454    default:
12455      gcc_unreachable ();
12456    }
12457}
12458  [(set_attr "type" "sselog,ssemov")
12459   (set_attr "prefix_extra" "1,*")
12460   (set_attr "length_immediate" "1,*")
12461   (set_attr "prefix" "vex")
12462   (set_attr "mode" "<sseinsnmode>")])
12463
12464(define_insn "vcvtph2ps"
12465  [(set (match_operand:V4SF 0 "register_operand" "=x")
12466	(vec_select:V4SF
12467	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12468		       UNSPEC_VCVTPH2PS)
12469	  (parallel [(const_int 0) (const_int 1)
12470		     (const_int 2) (const_int 3)])))]
12471  "TARGET_F16C"
12472  "vcvtph2ps\t{%1, %0|%0, %1}"
12473  [(set_attr "type" "ssecvt")
12474   (set_attr "prefix" "vex")
12475   (set_attr "mode" "V4SF")])
12476
12477(define_insn "*vcvtph2ps_load"
12478  [(set (match_operand:V4SF 0 "register_operand" "=x")
12479	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12480		     UNSPEC_VCVTPH2PS))]
12481  "TARGET_F16C"
12482  "vcvtph2ps\t{%1, %0|%0, %1}"
12483  [(set_attr "type" "ssecvt")
12484   (set_attr "prefix" "vex")
12485   (set_attr "mode" "V8SF")])
12486
12487(define_insn "vcvtph2ps256"
12488  [(set (match_operand:V8SF 0 "register_operand" "=x")
12489	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12490		     UNSPEC_VCVTPH2PS))]
12491  "TARGET_F16C"
12492  "vcvtph2ps\t{%1, %0|%0, %1}"
12493  [(set_attr "type" "ssecvt")
12494   (set_attr "prefix" "vex")
12495   (set_attr "mode" "V8SF")])
12496
12497(define_expand "vcvtps2ph"
12498  [(set (match_operand:V8HI 0 "register_operand" "")
12499	(vec_concat:V8HI
12500	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12501			(match_operand:SI 2 "const_0_to_255_operand" "")]
12502		       UNSPEC_VCVTPS2PH)
12503	  (match_dup 3)))]
12504  "TARGET_F16C"
12505  "operands[3] = CONST0_RTX (V4HImode);")
12506
12507(define_insn "*vcvtps2ph"
12508  [(set (match_operand:V8HI 0 "register_operand" "=x")
12509	(vec_concat:V8HI
12510	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12511			(match_operand:SI 2 "const_0_to_255_operand" "N")]
12512		       UNSPEC_VCVTPS2PH)
12513	  (match_operand:V4HI 3 "const0_operand" "")))]
12514  "TARGET_F16C"
12515  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12516  [(set_attr "type" "ssecvt")
12517   (set_attr "prefix" "vex")
12518   (set_attr "mode" "V4SF")])
12519
12520(define_insn "*vcvtps2ph_store"
12521  [(set (match_operand:V4HI 0 "memory_operand" "=m")
12522	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12523		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
12524		     UNSPEC_VCVTPS2PH))]
12525  "TARGET_F16C"
12526  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12527  [(set_attr "type" "ssecvt")
12528   (set_attr "prefix" "vex")
12529   (set_attr "mode" "V4SF")])
12530
12531(define_insn "vcvtps2ph256"
12532  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12533	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12534		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
12535		     UNSPEC_VCVTPS2PH))]
12536  "TARGET_F16C"
12537  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12538  [(set_attr "type" "ssecvt")
12539   (set_attr "prefix" "vex")
12540   (set_attr "mode" "V8SF")])
12541
12542;; For gather* insn patterns
12543(define_mode_iterator VEC_GATHER_MODE
12544		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12545(define_mode_attr VEC_GATHER_IDXSI
12546		      [(V2DI "V4SI") (V2DF "V4SI")
12547		       (V4DI "V4SI") (V4DF "V4SI")
12548		       (V4SI "V4SI") (V4SF "V4SI")
12549		       (V8SI "V8SI") (V8SF "V8SI")])
12550(define_mode_attr VEC_GATHER_IDXDI
12551		      [(V2DI "V2DI") (V2DF "V2DI")
12552		       (V4DI "V4DI") (V4DF "V4DI")
12553		       (V4SI "V2DI") (V4SF "V2DI")
12554		       (V8SI "V4DI") (V8SF "V4DI")])
12555(define_mode_attr VEC_GATHER_SRCDI
12556		      [(V2DI "V2DI") (V2DF "V2DF")
12557		       (V4DI "V4DI") (V4DF "V4DF")
12558		       (V4SI "V4SI") (V4SF "V4SF")
12559		       (V8SI "V4SI") (V8SF "V4SF")])
12560
12561(define_expand "avx2_gathersi<mode>"
12562  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12563		   (unspec:VEC_GATHER_MODE
12564		     [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12565		      (mem:<ssescalarmode>
12566			(match_par_dup 7
12567			  [(match_operand 2 "vsib_address_operand" "")
12568			   (match_operand:<VEC_GATHER_IDXSI>
12569			      3 "register_operand" "")
12570			   (match_operand:SI 5 "const1248_operand " "")]))
12571		      (mem:BLK (scratch))
12572		      (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12573		     UNSPEC_GATHER))
12574	      (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12575  "TARGET_AVX2"
12576{
12577  operands[7]
12578    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12579					operands[5]), UNSPEC_VSIBADDR);
12580})
12581
12582(define_insn "*avx2_gathersi<mode>"
12583  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12584	(unspec:VEC_GATHER_MODE
12585	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12586	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12587	     [(unspec:P
12588		[(match_operand:P 3 "vsib_address_operand" "p")
12589		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12590		 (match_operand:SI 6 "const1248_operand" "n")]
12591		UNSPEC_VSIBADDR)])
12592	   (mem:BLK (scratch))
12593	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12594	  UNSPEC_GATHER))
12595   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12596  "TARGET_AVX2"
12597  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12598  [(set_attr "type" "ssemov")
12599   (set_attr "prefix" "vex")
12600   (set_attr "mode" "<sseinsnmode>")])
12601
12602(define_insn "*avx2_gathersi<mode>_2"
12603  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12604	(unspec:VEC_GATHER_MODE
12605	  [(pc)
12606	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12607	     [(unspec:P
12608		[(match_operand:P 2 "vsib_address_operand" "p")
12609		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12610		 (match_operand:SI 5 "const1248_operand" "n")]
12611		UNSPEC_VSIBADDR)])
12612	   (mem:BLK (scratch))
12613	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12614	  UNSPEC_GATHER))
12615   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12616  "TARGET_AVX2"
12617  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12618  [(set_attr "type" "ssemov")
12619   (set_attr "prefix" "vex")
12620   (set_attr "mode" "<sseinsnmode>")])
12621
12622(define_expand "avx2_gatherdi<mode>"
12623  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12624		   (unspec:VEC_GATHER_MODE
12625		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12626		      (mem:<ssescalarmode>
12627			(match_par_dup 7
12628			  [(match_operand 2 "vsib_address_operand" "")
12629			   (match_operand:<VEC_GATHER_IDXDI>
12630			      3 "register_operand" "")
12631			   (match_operand:SI 5 "const1248_operand " "")]))
12632		      (mem:BLK (scratch))
12633		      (match_operand:<VEC_GATHER_SRCDI>
12634			4 "register_operand" "")]
12635		     UNSPEC_GATHER))
12636	      (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12637  "TARGET_AVX2"
12638{
12639  operands[7]
12640    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12641					operands[5]), UNSPEC_VSIBADDR);
12642})
12643
12644(define_insn "*avx2_gatherdi<mode>"
12645  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12646	(unspec:VEC_GATHER_MODE
12647	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12648	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12649	     [(unspec:P
12650		[(match_operand:P 3 "vsib_address_operand" "p")
12651		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12652		 (match_operand:SI 6 "const1248_operand" "n")]
12653		UNSPEC_VSIBADDR)])
12654	   (mem:BLK (scratch))
12655	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12656	  UNSPEC_GATHER))
12657   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12658  "TARGET_AVX2"
12659  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12660  [(set_attr "type" "ssemov")
12661   (set_attr "prefix" "vex")
12662   (set_attr "mode" "<sseinsnmode>")])
12663
12664(define_insn "*avx2_gatherdi<mode>_2"
12665  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12666	(unspec:VEC_GATHER_MODE
12667	  [(pc)
12668	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12669	     [(unspec:P
12670		[(match_operand:P 2 "vsib_address_operand" "p")
12671		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12672		 (match_operand:SI 5 "const1248_operand" "n")]
12673		UNSPEC_VSIBADDR)])
12674	   (mem:BLK (scratch))
12675	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12676	  UNSPEC_GATHER))
12677   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12678  "TARGET_AVX2"
12679{
12680  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12681    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12682  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12683}
12684  [(set_attr "type" "ssemov")
12685   (set_attr "prefix" "vex")
12686   (set_attr "mode" "<sseinsnmode>")])
12687
12688(define_insn "*avx2_gatherdi<mode>_3"
12689  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12690	(vec_select:<VEC_GATHER_SRCDI>
12691	  (unspec:VI4F_256
12692	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12693	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12694	       [(unspec:P
12695		  [(match_operand:P 3 "vsib_address_operand" "p")
12696		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12697		   (match_operand:SI 6 "const1248_operand" "n")]
12698		  UNSPEC_VSIBADDR)])
12699	     (mem:BLK (scratch))
12700	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12701	     UNSPEC_GATHER)
12702	  (parallel [(const_int 0) (const_int 1)
12703		     (const_int 2) (const_int 3)])))
12704   (clobber (match_scratch:VI4F_256 1 "=&x"))]
12705  "TARGET_AVX2"
12706  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
12707  [(set_attr "type" "ssemov")
12708   (set_attr "prefix" "vex")
12709   (set_attr "mode" "<sseinsnmode>")])
12710
12711(define_insn "*avx2_gatherdi<mode>_4"
12712  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
12713	(vec_select:<VEC_GATHER_SRCDI>
12714	  (unspec:VI4F_256
12715	    [(pc)
12716	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12717	       [(unspec:P
12718		  [(match_operand:P 2 "vsib_address_operand" "p")
12719		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12720		   (match_operand:SI 5 "const1248_operand" "n")]
12721		  UNSPEC_VSIBADDR)])
12722	     (mem:BLK (scratch))
12723	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12724	    UNSPEC_GATHER)
12725	  (parallel [(const_int 0) (const_int 1)
12726		     (const_int 2) (const_int 3)])))
12727   (clobber (match_scratch:VI4F_256 1 "=&x"))]
12728  "TARGET_AVX2"
12729  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
12730  [(set_attr "type" "ssemov")
12731   (set_attr "prefix" "vex")
12732   (set_attr "mode" "<sseinsnmode>")])
12733