xref: /dragonfly/contrib/gcc-8.0/gcc/config/i386/sse.md (revision ed183f8c)
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005-2018 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  ;; SSE
22  UNSPEC_MOVNT
23
24  ;; SSE2
25  UNSPEC_MOVDI_TO_SSE
26
27  ;; SSE3
28  UNSPEC_LDDQU
29
30  ;; SSSE3
31  UNSPEC_PSHUFB
32  UNSPEC_PSIGN
33  UNSPEC_PALIGNR
34
35  ;; For SSE4A support
36  UNSPEC_EXTRQI
37  UNSPEC_EXTRQ
38  UNSPEC_INSERTQI
39  UNSPEC_INSERTQ
40
41  ;; For SSE4.1 support
42  UNSPEC_BLENDV
43  UNSPEC_INSERTPS
44  UNSPEC_DP
45  UNSPEC_MOVNTDQA
46  UNSPEC_MPSADBW
47  UNSPEC_PHMINPOSUW
48  UNSPEC_PTEST
49
50  ;; For SSE4.2 support
51  UNSPEC_PCMPESTR
52  UNSPEC_PCMPISTR
53
54  ;; For FMA4 support
55  UNSPEC_FMADDSUB
56  UNSPEC_XOP_UNSIGNED_CMP
57  UNSPEC_XOP_TRUEFALSE
58  UNSPEC_XOP_PERMUTE
59  UNSPEC_FRCZ
60
61  ;; For AES support
62  UNSPEC_AESENC
63  UNSPEC_AESENCLAST
64  UNSPEC_AESDEC
65  UNSPEC_AESDECLAST
66  UNSPEC_AESIMC
67  UNSPEC_AESKEYGENASSIST
68
69  ;; For PCLMUL support
70  UNSPEC_PCLMUL
71
72  ;; For AVX support
73  UNSPEC_PCMP
74  UNSPEC_VPERMIL
75  UNSPEC_VPERMIL2
76  UNSPEC_VPERMIL2F128
77  UNSPEC_CAST
78  UNSPEC_VTESTP
79  UNSPEC_VCVTPH2PS
80  UNSPEC_VCVTPS2PH
81
82  ;; For AVX2 support
83  UNSPEC_VPERMVAR
84  UNSPEC_VPERMTI
85  UNSPEC_GATHER
86  UNSPEC_VSIBADDR
87
88  ;; For AVX512F support
89  UNSPEC_VPERMT2
90  UNSPEC_UNSIGNED_FIX_NOTRUNC
91  UNSPEC_UNSIGNED_PCMP
92  UNSPEC_TESTM
93  UNSPEC_TESTNM
94  UNSPEC_SCATTER
95  UNSPEC_RCP14
96  UNSPEC_RSQRT14
97  UNSPEC_FIXUPIMM
98  UNSPEC_SCALEF
99  UNSPEC_VTERNLOG
100  UNSPEC_GETEXP
101  UNSPEC_GETMANT
102  UNSPEC_ALIGN
103  UNSPEC_CONFLICT
104  UNSPEC_COMPRESS
105  UNSPEC_COMPRESS_STORE
106  UNSPEC_EXPAND
107  UNSPEC_MASKED_EQ
108  UNSPEC_MASKED_GT
109
110  ;; Mask operations
111  UNSPEC_MASKOP
112  UNSPEC_KORTEST
113  UNSPEC_KTEST
114
115  ;; For embed. rounding feature
116  UNSPEC_EMBEDDED_ROUNDING
117
118  ;; For AVX512PF support
119  UNSPEC_GATHER_PREFETCH
120  UNSPEC_SCATTER_PREFETCH
121
122  ;; For AVX512ER support
123  UNSPEC_EXP2
124  UNSPEC_RCP28
125  UNSPEC_RSQRT28
126
127  ;; For SHA support
128  UNSPEC_SHA1MSG1
129  UNSPEC_SHA1MSG2
130  UNSPEC_SHA1NEXTE
131  UNSPEC_SHA1RNDS4
132  UNSPEC_SHA256MSG1
133  UNSPEC_SHA256MSG2
134  UNSPEC_SHA256RNDS2
135
136  ;; For AVX512BW support
137  UNSPEC_DBPSADBW
138  UNSPEC_PMADDUBSW512
139  UNSPEC_PMADDWD512
140  UNSPEC_PSHUFHW
141  UNSPEC_PSHUFLW
142  UNSPEC_CVTINT2MASK
143
144  ;; For AVX512DQ support
145  UNSPEC_REDUCE
146  UNSPEC_FPCLASS
147  UNSPEC_RANGE
148
149  ;; For AVX512IFMA support
150  UNSPEC_VPMADD52LUQ
151  UNSPEC_VPMADD52HUQ
152
153  ;; For AVX512VBMI support
154  UNSPEC_VPMULTISHIFT
155
156  ;; For AVX5124FMAPS/AVX5124VNNIW support
157  UNSPEC_VP4FMADD
158  UNSPEC_VP4FNMADD
159  UNSPEC_VP4DPWSSD
160  UNSPEC_VP4DPWSSDS
161
162  ;; For GFNI support
163  UNSPEC_GF2P8AFFINEINV
164  UNSPEC_GF2P8AFFINE
165  UNSPEC_GF2P8MUL
166
167  ;; For AVX512VBMI2 support
168  UNSPEC_VPSHLD
169  UNSPEC_VPSHRD
170  UNSPEC_VPSHRDV
171  UNSPEC_VPSHLDV
172
173  ;; For AVX512VNNI support
174  UNSPEC_VPMADDUBSWACCD
175  UNSPEC_VPMADDUBSWACCSSD
176  UNSPEC_VPMADDWDACCD
177  UNSPEC_VPMADDWDACCSSD
178
179  ;; For VAES support
180  UNSPEC_VAESDEC
181  UNSPEC_VAESDECLAST
182  UNSPEC_VAESENC
183  UNSPEC_VAESENCLAST
184
185  ;; For VPCLMULQDQ support
186  UNSPEC_VPCLMULQDQ
187
188  ;; For AVX512BITALG support
189  UNSPEC_VPSHUFBIT
190])
191
192(define_c_enum "unspecv" [
193  UNSPECV_LDMXCSR
194  UNSPECV_STMXCSR
195  UNSPECV_CLFLUSH
196  UNSPECV_MONITOR
197  UNSPECV_MWAIT
198  UNSPECV_VZEROALL
199  UNSPECV_VZEROUPPER
200])
201
202;; All vector modes including V?TImode, used in move patterns.
203(define_mode_iterator VMOVE
204  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
205   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
206   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
207   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
208   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
209   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
210   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
211
212;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
213(define_mode_iterator V48_AVX512VL
214  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
215   V8DI  (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
216   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
217   V8DF  (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
218
219;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
220(define_mode_iterator VI12_AVX512VL
221  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
222   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
223
224;; Same iterator, but without supposed TARGET_AVX512BW
225(define_mode_iterator VI12_AVX512VLBW
226  [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
227   (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
228   (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
229
230(define_mode_iterator VI1_AVX512VL
231  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
232
233;; All vector modes
234(define_mode_iterator V
235  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
236   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
237   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
238   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
239   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
240   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
241
242;; All 128bit vector modes
243(define_mode_iterator V_128
244  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
245
246;; All 256bit vector modes
247(define_mode_iterator V_256
248  [V32QI V16HI V8SI V4DI V8SF V4DF])
249
250;; All 128bit and 256bit vector modes
251(define_mode_iterator V_128_256
252  [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
253
254;; All 512bit vector modes
255(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
256
257;; All 256bit and 512bit vector modes
258(define_mode_iterator V_256_512
259  [V32QI V16HI V8SI V4DI V8SF V4DF
260   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
261   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
262
263;; All vector float modes
264(define_mode_iterator VF
265  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
266   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
267
268;; 128- and 256-bit float vector modes
269(define_mode_iterator VF_128_256
270  [(V8SF "TARGET_AVX") V4SF
271   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
272
273;; All SFmode vector float modes
274(define_mode_iterator VF1
275  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
276
277;; 128- and 256-bit SF vector modes
278(define_mode_iterator VF1_128_256
279  [(V8SF "TARGET_AVX") V4SF])
280
281(define_mode_iterator VF1_128_256VL
282  [V8SF (V4SF "TARGET_AVX512VL")])
283
284;; All DFmode vector float modes
285(define_mode_iterator VF2
286  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
287
288;; 128- and 256-bit DF vector modes
289(define_mode_iterator VF2_128_256
290  [(V4DF "TARGET_AVX") V2DF])
291
292(define_mode_iterator VF2_512_256
293  [(V8DF "TARGET_AVX512F") V4DF])
294
295(define_mode_iterator VF2_512_256VL
296  [V8DF (V4DF "TARGET_AVX512VL")])
297
298;; All 128bit vector float modes
299(define_mode_iterator VF_128
300  [V4SF (V2DF "TARGET_SSE2")])
301
302;; All 256bit vector float modes
303(define_mode_iterator VF_256
304  [V8SF V4DF])
305
306;; All 512bit vector float modes
307(define_mode_iterator VF_512
308  [V16SF V8DF])
309
310(define_mode_iterator VI48_AVX512VL
311  [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
312   V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
313
314(define_mode_iterator VF_AVX512VL
315  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
316   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
317
318(define_mode_iterator VF2_AVX512VL
319  [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
320
321(define_mode_iterator VF1_AVX512VL
322  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
323
324;; All vector integer modes
325(define_mode_iterator VI
326  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
327   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
328   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
329   (V8SI "TARGET_AVX") V4SI
330   (V4DI "TARGET_AVX") V2DI])
331
332(define_mode_iterator VI_AVX2
333  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
334   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
335   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
336   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
337
338;; All QImode vector integer modes
339(define_mode_iterator VI1
340  [(V32QI "TARGET_AVX") V16QI])
341
342;; All DImode vector integer modes
343(define_mode_iterator V_AVX
344  [V16QI V8HI V4SI V2DI V4SF V2DF
345   (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
346   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
347   (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
348
349(define_mode_iterator VI48_AVX
350 [V4SI V2DI
351  (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
352
353(define_mode_iterator VI8
354  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
355
356(define_mode_iterator VI8_FVL
357  [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
358
359(define_mode_iterator VI8_AVX512VL
360  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
361
362(define_mode_iterator VI8_256_512
363  [V8DI (V4DI "TARGET_AVX512VL")])
364
365(define_mode_iterator VI1_AVX2
366  [(V32QI "TARGET_AVX2") V16QI])
367
368(define_mode_iterator VI1_AVX512
369  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
370
371(define_mode_iterator VI1_AVX512F
372  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
373
374(define_mode_iterator VI2_AVX2
375  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
376
377(define_mode_iterator VI2_AVX512F
378  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
379
380(define_mode_iterator VI4_AVX
381  [(V8SI "TARGET_AVX") V4SI])
382
383(define_mode_iterator VI4_AVX2
384  [(V8SI "TARGET_AVX2") V4SI])
385
386(define_mode_iterator VI4_AVX512F
387  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
388
389(define_mode_iterator VI4_AVX512VL
390  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
391
392(define_mode_iterator VI48_AVX512F_AVX512VL
393  [V4SI V8SI (V16SI "TARGET_AVX512F")
394   (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
395
396(define_mode_iterator VI2_AVX512VL
397  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
398
399(define_mode_iterator VI1_AVX512VL_F
400  [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
401
402(define_mode_iterator VI8_AVX2_AVX512BW
403  [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
404
405(define_mode_iterator VI8_AVX2
406  [(V4DI "TARGET_AVX2") V2DI])
407
408(define_mode_iterator VI8_AVX2_AVX512F
409  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
410
411(define_mode_iterator VI8_AVX_AVX512F
412  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
413
414(define_mode_iterator VI4_128_8_256
415  [V4SI V4DI])
416
417;; All V8D* modes
418(define_mode_iterator V8FI
419  [V8DF V8DI])
420
421;; All V16S* modes
422(define_mode_iterator V16FI
423  [V16SF V16SI])
424
425;; ??? We should probably use TImode instead.
426(define_mode_iterator VIMAX_AVX2_AVX512BW
427  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
428
429;; Suppose TARGET_AVX512BW as baseline
430(define_mode_iterator VIMAX_AVX512VL
431  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
432
433(define_mode_iterator VIMAX_AVX2
434  [(V2TI "TARGET_AVX2") V1TI])
435
436;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
437(define_mode_iterator SSESCALARMODE
438  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
439
440(define_mode_iterator VI12_AVX2
441  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
442   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
443
444(define_mode_iterator VI24_AVX2
445  [(V16HI "TARGET_AVX2") V8HI
446   (V8SI "TARGET_AVX2") V4SI])
447
448(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
449  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
450   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
451   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
452
453(define_mode_iterator VI124_AVX2
454  [(V32QI "TARGET_AVX2") V16QI
455   (V16HI "TARGET_AVX2") V8HI
456   (V8SI "TARGET_AVX2") V4SI])
457
458(define_mode_iterator VI2_AVX2_AVX512BW
459  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
460
461(define_mode_iterator VI248_AVX512VL
462  [V32HI V16SI V8DI
463   (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
464   (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
465   (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
466
467(define_mode_iterator VI48_AVX2
468  [(V8SI "TARGET_AVX2") V4SI
469   (V4DI "TARGET_AVX2") V2DI])
470
471(define_mode_iterator VI248_AVX2
472  [(V16HI "TARGET_AVX2") V8HI
473   (V8SI "TARGET_AVX2") V4SI
474   (V4DI "TARGET_AVX2") V2DI])
475
476(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
477  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
478   (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
479   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
480
481(define_mode_iterator VI248_AVX512BW
482  [(V32HI "TARGET_AVX512BW") V16SI V8DI])
483
484(define_mode_iterator VI248_AVX512BW_AVX512VL
485  [(V32HI "TARGET_AVX512BW")
486   (V4DI "TARGET_AVX512VL") V16SI V8DI])
487
488;; Suppose TARGET_AVX512VL as baseline
489(define_mode_iterator VI248_AVX512BW_1
490 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
491  V8SI V4SI
492  V2DI])
493
494(define_mode_iterator VI248_AVX512BW_2
495 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
496  V8SI V4SI
497  V4DI V2DI])
498
499(define_mode_iterator VI48_AVX512F
500  [(V16SI "TARGET_AVX512F") V8SI V4SI
501   (V8DI "TARGET_AVX512F") V4DI V2DI])
502
503(define_mode_iterator VI48_AVX_AVX512F
504  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
505   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
506
507(define_mode_iterator VI12_AVX_AVX512F
508  [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
509    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
510
511(define_mode_iterator V48_AVX2
512  [V4SF V2DF
513   V8SF V4DF
514   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
515   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
516
517(define_mode_iterator VI1_AVX512VLBW
518  [(V64QI "TARGET_AVX512BW") (V32QI  "TARGET_AVX512VL")
519	(V16QI  "TARGET_AVX512VL")])
520
521(define_mode_attr avx512
522  [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
523   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
524   (V4SI  "avx512vl") (V8SI  "avx512vl") (V16SI "avx512f")
525   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
526   (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
527   (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
528
529(define_mode_attr sse2_avx_avx512f
530  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
531   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
532   (V4SI  "sse2") (V8SI  "avx") (V16SI "avx512f")
533   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
534   (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
535   (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
536
537(define_mode_attr sse2_avx2
538  [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
539   (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
540   (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
541   (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
542   (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
543
544(define_mode_attr ssse3_avx2
545   [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
546    (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
547    (V4SI "ssse3") (V8SI "avx2")
548    (V2DI "ssse3") (V4DI "avx2")
549    (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
550
551(define_mode_attr sse4_1_avx2
552   [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
553    (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
554    (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
555    (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
556
557(define_mode_attr avx_avx2
558  [(V4SF "avx") (V2DF "avx")
559   (V8SF "avx") (V4DF "avx")
560   (V4SI "avx2") (V2DI "avx2")
561   (V8SI "avx2") (V4DI "avx2")])
562
563(define_mode_attr vec_avx2
564  [(V16QI "vec") (V32QI "avx2")
565   (V8HI "vec") (V16HI "avx2")
566   (V4SI "vec") (V8SI "avx2")
567   (V2DI "vec") (V4DI "avx2")])
568
569(define_mode_attr avx2_avx512
570  [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
571   (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
572   (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
573   (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
574   (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
575
576(define_mode_attr shuffletype
577  [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
578  (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
579  (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
580  (V32HI "i") (V16HI "i") (V8HI "i")
581  (V64QI "i") (V32QI "i") (V16QI "i")
582  (V4TI "i") (V2TI "i") (V1TI "i")])
583
584(define_mode_attr ssequartermode
585  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
586
587(define_mode_attr ssedoublemodelower
588  [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
589   (V8HI "v8si")   (V16HI "v16si") (V32HI "v32si")
590   (V4SI "v4di")   (V8SI "v8di")   (V16SI "v16di")])
591
592(define_mode_attr ssedoublemode
593  [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
594   (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
595   (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
596   (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
597   (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
598   (V4DI "V8DI") (V8DI "V16DI")])
599
600(define_mode_attr ssebytemode
601  [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
602
603;; All 128bit vector integer modes
604(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
605
606;; All 256bit vector integer modes
607(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
608
609;; Various 128bit vector integer mode combinations
610(define_mode_iterator VI12_128 [V16QI V8HI])
611(define_mode_iterator VI14_128 [V16QI V4SI])
612(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
613(define_mode_iterator VI24_128 [V8HI V4SI])
614(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
615(define_mode_iterator VI48_128 [V4SI V2DI])
616
617;; Various 256bit and 512 vector integer mode combinations
618(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
619(define_mode_iterator VI124_256_AVX512F_AVX512BW
620  [V32QI V16HI V8SI
621   (V64QI "TARGET_AVX512BW")
622   (V32HI "TARGET_AVX512BW")
623   (V16SI "TARGET_AVX512F")])
624(define_mode_iterator VI48_256 [V8SI V4DI])
625(define_mode_iterator VI48_512 [V16SI V8DI])
626(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
627(define_mode_iterator VI_AVX512BW
628  [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
629
630;; Int-float size matches
631(define_mode_iterator VI4F_128 [V4SI V4SF])
632(define_mode_iterator VI8F_128 [V2DI V2DF])
633(define_mode_iterator VI4F_256 [V8SI V8SF])
634(define_mode_iterator VI8F_256 [V4DI V4DF])
635(define_mode_iterator VI4F_256_512
636  [V8SI V8SF
637   (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
638(define_mode_iterator VI48F_256_512
639  [V8SI V8SF
640  (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
641  (V8DI  "TARGET_AVX512F") (V8DF  "TARGET_AVX512F")
642  (V4DI  "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
643(define_mode_iterator VF48_I1248
644  [V16SI V16SF V8DI V8DF V32HI V64QI])
645(define_mode_iterator VI48F
646  [V16SI V16SF V8DI V8DF
647   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
648   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
649   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
650   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
651(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
652
653;; Mapping from float mode to required SSE level
654(define_mode_attr sse
655  [(SF "sse") (DF "sse2")
656   (V4SF "sse") (V2DF "sse2")
657   (V16SF "avx512f") (V8SF "avx")
658   (V8DF "avx512f") (V4DF "avx")])
659
660(define_mode_attr sse2
661  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
662   (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
663
664(define_mode_attr sse3
665  [(V16QI "sse3") (V32QI "avx")])
666
667(define_mode_attr sse4_1
668  [(V4SF "sse4_1") (V2DF "sse4_1")
669   (V8SF "avx") (V4DF "avx")
670   (V8DF "avx512f")
671   (V4DI "avx") (V2DI "sse4_1")
672   (V8SI "avx") (V4SI "sse4_1")
673   (V16QI "sse4_1") (V32QI "avx")
674   (V8HI "sse4_1") (V16HI "avx")])
675
676(define_mode_attr avxsizesuffix
677  [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
678   (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
679   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
680   (V16SF "512") (V8DF "512")
681   (V8SF "256") (V4DF "256")
682   (V4SF "") (V2DF "")])
683
684;; SSE instruction mode
685(define_mode_attr sseinsnmode
686  [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
687   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
688   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
689   (V16SF "V16SF") (V8DF "V8DF")
690   (V8SF "V8SF") (V4DF "V4DF")
691   (V4SF "V4SF") (V2DF "V2DF")
692   (TI "TI")])
693
694;; Mapping of vector modes to corresponding mask size
695(define_mode_attr avx512fmaskmode
696  [(V64QI "DI") (V32QI "SI") (V16QI "HI")
697   (V32HI "SI") (V16HI "HI") (V8HI  "QI") (V4HI "QI")
698   (V16SI "HI") (V8SI  "QI") (V4SI  "QI")
699   (V8DI  "QI") (V4DI  "QI") (V2DI  "QI")
700   (V16SF "HI") (V8SF  "QI") (V4SF  "QI")
701   (V8DF  "QI") (V4DF  "QI") (V2DF  "QI")])
702
703;; Mapping of vector modes to corresponding mask size
704(define_mode_attr avx512fmaskmodelower
705  [(V64QI "di") (V32QI "si") (V16QI "hi")
706   (V32HI "si") (V16HI "hi") (V8HI  "qi") (V4HI "qi")
707   (V16SI "hi") (V8SI  "qi") (V4SI  "qi")
708   (V8DI  "qi") (V4DI  "qi") (V2DI  "qi")
709   (V16SF "hi") (V8SF  "qi") (V4SF  "qi")
710   (V8DF  "qi") (V4DF  "qi") (V2DF  "qi")])
711
712;; Mapping of vector float modes to an integer mode of the same size
713(define_mode_attr sseintvecmode
714  [(V16SF "V16SI") (V8DF  "V8DI")
715   (V8SF  "V8SI")  (V4DF  "V4DI")
716   (V4SF  "V4SI")  (V2DF  "V2DI")
717   (V16SI "V16SI") (V8DI  "V8DI")
718   (V8SI  "V8SI")  (V4DI  "V4DI")
719   (V4SI  "V4SI")  (V2DI  "V2DI")
720   (V16HI "V16HI") (V8HI  "V8HI")
721   (V32HI "V32HI") (V64QI "V64QI")
722   (V32QI "V32QI") (V16QI "V16QI")])
723
724(define_mode_attr sseintvecmode2
725  [(V8DF "XI") (V4DF "OI") (V2DF "TI")
726   (V8SF "OI") (V4SF "TI")])
727
728(define_mode_attr sseintvecmodelower
729  [(V16SF "v16si") (V8DF "v8di")
730   (V8SF "v8si") (V4DF "v4di")
731   (V4SF "v4si") (V2DF "v2di")
732   (V8SI "v8si") (V4DI "v4di")
733   (V4SI "v4si") (V2DI "v2di")
734   (V16HI "v16hi") (V8HI "v8hi")
735   (V32QI "v32qi") (V16QI "v16qi")])
736
737;; Mapping of vector modes to a vector mode of double size
738(define_mode_attr ssedoublevecmode
739  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
740   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
741   (V8SF "V16SF") (V4DF "V8DF")
742   (V4SF "V8SF") (V2DF "V4DF")])
743
744;; Mapping of vector modes to a vector mode of half size
745(define_mode_attr ssehalfvecmode
746  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
747   (V32QI "V16QI") (V16HI  "V8HI") (V8SI  "V4SI") (V4DI "V2DI")
748   (V16QI  "V8QI") (V8HI   "V4HI") (V4SI  "V2SI")
749   (V16SF "V8SF") (V8DF "V4DF")
750   (V8SF  "V4SF") (V4DF "V2DF")
751   (V4SF  "V2SF")])
752
753(define_mode_attr ssehalfvecmodelower
754  [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
755   (V32QI "v16qi") (V16HI  "v8hi") (V8SI  "v4si") (V4DI "v2di")
756   (V16QI  "v8qi") (V8HI   "v4hi") (V4SI  "v2si")
757   (V16SF "v8sf") (V8DF "v4df")
758   (V8SF  "v4sf") (V4DF "v2df")
759   (V4SF  "v2sf")])
760
761;; Mapping of vector modes ti packed single mode of the same size
762(define_mode_attr ssePSmode
763  [(V16SI "V16SF") (V8DF "V16SF")
764   (V16SF "V16SF") (V8DI "V16SF")
765   (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
766   (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
767   (V8SI "V8SF") (V4SI "V4SF")
768   (V4DI "V8SF") (V2DI "V4SF")
769   (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
770   (V8SF "V8SF") (V4SF "V4SF")
771   (V4DF "V8SF") (V2DF "V4SF")])
772
773(define_mode_attr ssePSmode2
774  [(V8DI "V8SF") (V4DI "V4SF")])
775
776;; Mapping of vector modes back to the scalar modes
777(define_mode_attr ssescalarmode
778  [(V64QI "QI") (V32QI "QI") (V16QI "QI")
779   (V32HI "HI") (V16HI "HI") (V8HI "HI")
780   (V16SI "SI") (V8SI "SI")  (V4SI "SI")
781   (V8DI "DI")  (V4DI "DI")  (V2DI "DI")
782   (V16SF "SF") (V8SF "SF")  (V4SF "SF")
783   (V8DF "DF")  (V4DF "DF")  (V2DF "DF")
784   (V4TI "TI")  (V2TI "TI")])
785
786;; Mapping of vector modes back to the scalar modes
787(define_mode_attr ssescalarmodelower
788  [(V64QI "qi") (V32QI "qi") (V16QI "qi")
789   (V32HI "hi") (V16HI "hi") (V8HI "hi")
790   (V16SI "si") (V8SI "si")  (V4SI "si")
791   (V8DI "di")  (V4DI "di")  (V2DI "di")
792   (V16SF "sf") (V8SF "sf")  (V4SF "sf")
793   (V8DF "df")  (V4DF "df")  (V2DF "df")
794   (V4TI "ti")  (V2TI "ti")])
795
796;; Mapping of vector modes to the 128bit modes
797(define_mode_attr ssexmmmode
798  [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
799   (V32HI "V8HI")  (V16HI "V8HI") (V8HI "V8HI")
800   (V16SI "V4SI")  (V8SI "V4SI")  (V4SI "V4SI")
801   (V8DI "V2DI")   (V4DI "V2DI")  (V2DI "V2DI")
802   (V16SF "V4SF")  (V8SF "V4SF")  (V4SF "V4SF")
803   (V8DF "V2DF")   (V4DF "V2DF")  (V2DF "V2DF")])
804
805;; Pointer size override for scalar modes (Intel asm dialect)
806(define_mode_attr iptr
807  [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
808   (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
809   (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
810   (V16SF "k") (V8DF "q")
811   (V8SF "k") (V4DF "q")
812   (V4SF "k") (V2DF "q")
813   (SF "k") (DF "q")])
814
815;; Number of scalar elements in each vector type
816(define_mode_attr ssescalarnum
817  [(V64QI "64") (V16SI "16") (V8DI "8")
818   (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
819   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
820   (V16SF "16") (V8DF "8")
821   (V8SF "8") (V4DF "4")
822   (V4SF "4") (V2DF "2")])
823
824;; Mask of scalar elements in each vector type
825(define_mode_attr ssescalarnummask
826  [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
827   (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
828   (V8SF "7") (V4DF "3")
829   (V4SF "3") (V2DF "1")])
830
831(define_mode_attr ssescalarsize
832  [(V4TI  "64") (V2TI  "64") (V1TI  "64")
833   (V8DI  "64") (V4DI  "64") (V2DI  "64")
834   (V64QI "8") (V32QI "8") (V16QI "8")
835   (V32HI "16") (V16HI "16") (V8HI "16")
836   (V16SI "32") (V8SI "32") (V4SI "32")
837   (V16SF "32") (V8SF "32") (V4SF "32")
838   (V8DF "64") (V4DF "64") (V2DF "64")])
839
840;; SSE prefix for integer vector modes
841(define_mode_attr sseintprefix
842  [(V2DI  "p") (V2DF  "")
843   (V4DI  "p") (V4DF  "")
844   (V8DI  "p") (V8DF  "")
845   (V4SI  "p") (V4SF  "")
846   (V8SI  "p") (V8SF  "")
847   (V16SI "p") (V16SF "")
848   (V16QI "p") (V8HI "p")
849   (V32QI "p") (V16HI "p")
850   (V64QI "p") (V32HI "p")])
851
852;; SSE scalar suffix for vector modes
853(define_mode_attr ssescalarmodesuffix
854  [(SF "ss") (DF "sd")
855   (V16SF "ss") (V8DF "sd")
856   (V8SF "ss") (V4DF "sd")
857   (V4SF "ss") (V2DF "sd")
858   (V16SI "d") (V8DI "q")
859   (V8SI "d") (V4DI "q")
860   (V4SI "d") (V2DI "q")])
861
862;; Pack/unpack vector modes
863(define_mode_attr sseunpackmode
864  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
865   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
866   (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
867
868(define_mode_attr ssepackmode
869  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
870   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
871   (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
872
873;; Mapping of the max integer size for xop rotate immediate constraint
874(define_mode_attr sserotatemax
875  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
876
877;; Mapping of mode to cast intrinsic name
878(define_mode_attr castmode
879 [(V8SI "si") (V8SF "ps") (V4DF "pd")
880  (V16SI "si") (V16SF "ps") (V8DF "pd")])
881
882;; Instruction suffix for sign and zero extensions.
883(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
884
885;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
886;; i64x4 or f64x4 for 512bit modes.
887(define_mode_attr i128
888  [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
889   (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
890   (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
891
892;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
893;; i32x4, f32x4, i64x2 or f64x2 suffixes.
894(define_mode_attr i128vldq
895  [(V8SF "f32x4") (V4DF "f64x2")
896   (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
897
898;; Mix-n-match
899(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
900(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
901
902;; Mapping for dbpsabbw modes
903(define_mode_attr dbpsadbwmode
904  [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
905
906;; Mapping suffixes for broadcast
907(define_mode_attr bcstscalarsuff
908  [(V64QI "b")  (V32QI "b") (V16QI "b")
909   (V32HI "w")  (V16HI "w") (V8HI "w")
910   (V16SI "d")  (V8SI "d")  (V4SI "d")
911   (V8DI "q")   (V4DI "q")  (V2DI "q")
912   (V16SF "ss") (V8SF "ss") (V4SF "ss")
913   (V8DF "sd")  (V4DF "sd") (V2DF "sd")])
914
915;; Tie mode of assembler operand to mode iterator
916(define_mode_attr concat_tg_mode
917  [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
918   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
919
920;; Tie mode of assembler operand to mode iterator
921(define_mode_attr xtg_mode
922  [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
923   (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
924   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
925
926;; Half mask mode for unpacks
927(define_mode_attr HALFMASKMODE
928  [(DI "SI") (SI "HI")])
929
930;; Double mask mode for packs
931(define_mode_attr DOUBLEMASKMODE
932  [(HI "SI") (SI "DI")])
933
934
935;; Include define_subst patterns for instructions with mask
936(include "subst.md")
937
938;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
939
940;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
941;;
942;; Move patterns
943;;
944;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
945
946;; All of these patterns are enabled for SSE1 as well as SSE2.
947;; This is essential for maintaining stable calling conventions.
948
949(define_expand "mov<mode>"
950  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
951	(match_operand:VMOVE 1 "nonimmediate_operand"))]
952  "TARGET_SSE"
953{
954  ix86_expand_vector_move (<MODE>mode, operands);
955  DONE;
956})
957
958(define_insn "mov<mode>_internal"
959  [(set (match_operand:VMOVE 0 "nonimmediate_operand"
960	 "=v,v ,v ,m")
961	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
962	 " C,BC,vm,v"))]
963  "TARGET_SSE
964   && (register_operand (operands[0], <MODE>mode)
965       || register_operand (operands[1], <MODE>mode))"
966{
967  switch (get_attr_type (insn))
968    {
969    case TYPE_SSELOG1:
970      return standard_sse_constant_opcode (insn, operands);
971
972    case TYPE_SSEMOV:
973      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
974	 in avx512f, so we need to use workarounds, to access sse registers
975	 16-31, which are evex-only. In avx512vl we don't need workarounds.  */
976      if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
977	  && (EXT_REX_SSE_REG_P (operands[0])
978	      || EXT_REX_SSE_REG_P (operands[1])))
979	{
980	  if (memory_operand (operands[0], <MODE>mode))
981	    {
982	      if (<MODE_SIZE> == 32)
983		return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
984	      else if (<MODE_SIZE> == 16)
985		return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
986	      else
987		gcc_unreachable ();
988	    }
989	  else if (memory_operand (operands[1], <MODE>mode))
990	    {
991	      if (<MODE_SIZE> == 32)
992		return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
993	      else if (<MODE_SIZE> == 16)
994		return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
995	      else
996		gcc_unreachable ();
997	    }
998	  else
999	    /* Reg -> reg move is always aligned.  Just use wider move.  */
1000	    switch (get_attr_mode (insn))
1001	      {
1002	      case MODE_V8SF:
1003	      case MODE_V4SF:
1004		return "vmovaps\t{%g1, %g0|%g0, %g1}";
1005	      case MODE_V4DF:
1006	      case MODE_V2DF:
1007		return "vmovapd\t{%g1, %g0|%g0, %g1}";
1008	      case MODE_OI:
1009	      case MODE_TI:
1010		return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
1011	      default:
1012		gcc_unreachable ();
1013	      }
1014	}
1015
1016      switch (get_attr_mode (insn))
1017	{
1018	case MODE_V16SF:
1019	case MODE_V8SF:
1020	case MODE_V4SF:
1021	  if (misaligned_operand (operands[0], <MODE>mode)
1022	      || misaligned_operand (operands[1], <MODE>mode))
1023	    return "%vmovups\t{%1, %0|%0, %1}";
1024	  else
1025	    return "%vmovaps\t{%1, %0|%0, %1}";
1026
1027	case MODE_V8DF:
1028	case MODE_V4DF:
1029	case MODE_V2DF:
1030	  if (misaligned_operand (operands[0], <MODE>mode)
1031	      || misaligned_operand (operands[1], <MODE>mode))
1032	    return "%vmovupd\t{%1, %0|%0, %1}";
1033	  else
1034	    return "%vmovapd\t{%1, %0|%0, %1}";
1035
1036	case MODE_OI:
1037	case MODE_TI:
1038	  if (misaligned_operand (operands[0], <MODE>mode)
1039	      || misaligned_operand (operands[1], <MODE>mode))
1040	    return TARGET_AVX512VL
1041		   && (<MODE>mode == V4SImode
1042		       || <MODE>mode == V2DImode
1043		       || <MODE>mode == V8SImode
1044		       || <MODE>mode == V4DImode
1045		       || TARGET_AVX512BW)
1046		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1047		   : "%vmovdqu\t{%1, %0|%0, %1}";
1048	  else
1049	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
1050				   : "%vmovdqa\t{%1, %0|%0, %1}";
1051	case MODE_XI:
1052	  if (misaligned_operand (operands[0], <MODE>mode)
1053	      || misaligned_operand (operands[1], <MODE>mode))
1054	    return (<MODE>mode == V16SImode
1055		    || <MODE>mode == V8DImode
1056		    || TARGET_AVX512BW)
1057		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1058		   : "vmovdqu64\t{%1, %0|%0, %1}";
1059	  else
1060	    return "vmovdqa64\t{%1, %0|%0, %1}";
1061
1062	default:
1063	  gcc_unreachable ();
1064	}
1065
1066    default:
1067      gcc_unreachable ();
1068    }
1069}
1070  [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1071   (set_attr "prefix" "maybe_vex")
1072   (set (attr "mode")
1073	(cond [(and (eq_attr "alternative" "1")
1074		    (match_test "TARGET_AVX512VL"))
1075		 (const_string "<sseinsnmode>")
1076	       (and (match_test "<MODE_SIZE> == 16")
1077		    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1078			 (and (eq_attr "alternative" "3")
1079			      (match_test "TARGET_SSE_TYPELESS_STORES"))))
1080		 (const_string "<ssePSmode>")
1081	       (match_test "TARGET_AVX")
1082		 (const_string "<sseinsnmode>")
1083	       (ior (not (match_test "TARGET_SSE2"))
1084		    (match_test "optimize_function_for_size_p (cfun)"))
1085		 (const_string "V4SF")
1086	       (and (eq_attr "alternative" "0")
1087		    (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1088		 (const_string "TI")
1089	      ]
1090	      (const_string "<sseinsnmode>")))
1091   (set (attr "enabled")
1092        (cond [(and (match_test "<MODE_SIZE> == 16")
1093		    (eq_attr "alternative" "1"))
1094		 (symbol_ref "TARGET_SSE2")
1095	       (and (match_test "<MODE_SIZE> == 32")
1096		    (eq_attr "alternative" "1"))
1097		 (symbol_ref "TARGET_AVX2")
1098	      ]
1099	      (symbol_ref "true")))])
1100
1101(define_insn "<avx512>_load<mode>_mask"
1102  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1103	(vec_merge:V48_AVX512VL
1104	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1105	  (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
1106	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1107  "TARGET_AVX512F"
1108{
1109  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1110    {
1111      if (misaligned_operand (operands[1], <MODE>mode))
1112	return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1113      else
1114	return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1115    }
1116  else
1117    {
1118      if (misaligned_operand (operands[1], <MODE>mode))
1119	return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1120      else
1121	return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1122    }
1123}
1124  [(set_attr "type" "ssemov")
1125   (set_attr "prefix" "evex")
1126   (set_attr "memory" "none,load")
1127   (set_attr "mode" "<sseinsnmode>")])
1128
1129(define_insn "<avx512>_load<mode>_mask"
1130  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1131	(vec_merge:VI12_AVX512VL
1132	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1133	  (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
1134	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1135  "TARGET_AVX512BW"
1136  "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1137  [(set_attr "type" "ssemov")
1138   (set_attr "prefix" "evex")
1139   (set_attr "memory" "none,load")
1140   (set_attr "mode" "<sseinsnmode>")])
1141
1142(define_insn "<avx512>_blendm<mode>"
1143  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1144	(vec_merge:V48_AVX512VL
1145	  (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1146	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
1147	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1148  "TARGET_AVX512F"
1149  "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1150  [(set_attr "type" "ssemov")
1151   (set_attr "prefix" "evex")
1152   (set_attr "mode" "<sseinsnmode>")])
1153
1154(define_insn "<avx512>_blendm<mode>"
1155  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1156	(vec_merge:VI12_AVX512VL
1157	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1158	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1159	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1160  "TARGET_AVX512BW"
1161  "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1162  [(set_attr "type" "ssemov")
1163   (set_attr "prefix" "evex")
1164   (set_attr "mode" "<sseinsnmode>")])
1165
1166(define_insn "<avx512>_store<mode>_mask"
1167  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1168	(vec_merge:V48_AVX512VL
1169	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
1170	  (match_dup 0)
1171	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1172  "TARGET_AVX512F"
1173{
1174  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1175    {
1176      if (misaligned_operand (operands[0], <MODE>mode))
1177	return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1178      else
1179	return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1180    }
1181  else
1182    {
1183      if (misaligned_operand (operands[0], <MODE>mode))
1184	return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1185      else
1186	return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1187    }
1188}
1189  [(set_attr "type" "ssemov")
1190   (set_attr "prefix" "evex")
1191   (set_attr "memory" "store")
1192   (set_attr "mode" "<sseinsnmode>")])
1193
1194(define_insn "<avx512>_store<mode>_mask"
1195  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1196	(vec_merge:VI12_AVX512VL
1197	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1198	  (match_dup 0)
1199	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1200  "TARGET_AVX512BW"
1201  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1202  [(set_attr "type" "ssemov")
1203   (set_attr "prefix" "evex")
1204   (set_attr "memory" "store")
1205   (set_attr "mode" "<sseinsnmode>")])
1206
1207(define_insn "sse2_movq128"
1208  [(set (match_operand:V2DI 0 "register_operand" "=v")
1209	(vec_concat:V2DI
1210	  (vec_select:DI
1211	    (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1212	    (parallel [(const_int 0)]))
1213	  (const_int 0)))]
1214  "TARGET_SSE2"
1215  "%vmovq\t{%1, %0|%0, %q1}"
1216  [(set_attr "type" "ssemov")
1217   (set_attr "prefix" "maybe_vex")
1218   (set_attr "mode" "TI")])
1219
1220;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1221;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1222;; from memory, we'd prefer to load the memory directly into the %xmm
1223;; register.  To facilitate this happy circumstance, this pattern won't
1224;; split until after register allocation.  If the 64-bit value didn't
1225;; come from memory, this is the best we can do.  This is much better
1226;; than storing %edx:%eax into a stack temporary and loading an %xmm
1227;; from there.
1228
1229(define_insn_and_split "movdi_to_sse"
1230  [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1231	(unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m")]
1232		     UNSPEC_MOVDI_TO_SSE))
1233     (clobber (match_scratch:V4SI 2 "=&x,X"))]
1234  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1235  "#"
1236  "&& reload_completed"
1237  [(const_int 0)]
1238{
1239 if (register_operand (operands[1], DImode))
1240   {
1241      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1242	 Assemble the 64-bit DImode value in an xmm register.  */
1243      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1244				  gen_lowpart (SImode, operands[1])));
1245      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1246				  gen_highpart (SImode, operands[1])));
1247      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1248					     operands[2]));
1249   }
1250 else if (memory_operand (operands[1], DImode))
1251   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1252				  operands[1], const0_rtx));
1253 else
1254   gcc_unreachable ();
1255 DONE;
1256})
1257
1258(define_split
1259  [(set (match_operand:V4SF 0 "register_operand")
1260	(match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1261  "TARGET_SSE && reload_completed"
1262  [(set (match_dup 0)
1263	(vec_merge:V4SF
1264	  (vec_duplicate:V4SF (match_dup 1))
1265	  (match_dup 2)
1266	  (const_int 1)))]
1267{
1268  operands[1] = gen_lowpart (SFmode, operands[1]);
1269  operands[2] = CONST0_RTX (V4SFmode);
1270})
1271
1272(define_split
1273  [(set (match_operand:V2DF 0 "register_operand")
1274	(match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1275  "TARGET_SSE2 && reload_completed"
1276  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1277{
1278  operands[1] = gen_lowpart (DFmode, operands[1]);
1279  operands[2] = CONST0_RTX (DFmode);
1280})
1281
1282(define_expand "movmisalign<mode>"
1283  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1284	(match_operand:VMOVE 1 "nonimmediate_operand"))]
1285  "TARGET_SSE"
1286{
1287  ix86_expand_vector_move_misalign (<MODE>mode, operands);
1288  DONE;
1289})
1290
1291;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1292(define_peephole2
1293  [(set (match_operand:V2DF 0 "sse_reg_operand")
1294	(vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1295			 (match_operand:DF 4 "const0_operand")))
1296   (set (match_operand:V2DF 2 "sse_reg_operand")
1297	(vec_concat:V2DF (vec_select:DF (match_dup 2)
1298					(parallel [(const_int 0)]))
1299			 (match_operand:DF 3 "memory_operand")))]
1300  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1301   && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1302  [(set (match_dup 2) (match_dup 5))]
1303  "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1304
1305(define_peephole2
1306  [(set (match_operand:DF 0 "sse_reg_operand")
1307	(match_operand:DF 1 "memory_operand"))
1308   (set (match_operand:V2DF 2 "sse_reg_operand")
1309	(vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1310			 (match_operand:DF 3 "memory_operand")))]
1311  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1312   && REGNO (operands[4]) == REGNO (operands[2])
1313   && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1314  [(set (match_dup 2) (match_dup 5))]
1315  "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1316
1317;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1318(define_peephole2
1319  [(set (match_operand:DF 0 "memory_operand")
1320	(vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1321		       (parallel [(const_int 0)])))
1322   (set (match_operand:DF 2 "memory_operand")
1323	(vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1324		       (parallel [(const_int 1)])))]
1325  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1326   && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1327  [(set (match_dup 4) (match_dup 1))]
1328  "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1329
1330(define_insn "<sse3>_lddqu<avxsizesuffix>"
1331  [(set (match_operand:VI1 0 "register_operand" "=x")
1332	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1333		    UNSPEC_LDDQU))]
1334  "TARGET_SSE3"
1335  "%vlddqu\t{%1, %0|%0, %1}"
1336  [(set_attr "type" "ssemov")
1337   (set_attr "movu" "1")
1338   (set (attr "prefix_data16")
1339     (if_then_else
1340       (match_test "TARGET_AVX")
1341     (const_string "*")
1342     (const_string "0")))
1343   (set (attr "prefix_rep")
1344     (if_then_else
1345       (match_test "TARGET_AVX")
1346     (const_string "*")
1347     (const_string "1")))
1348   (set_attr "prefix" "maybe_vex")
1349   (set_attr "mode" "<sseinsnmode>")])
1350
1351(define_insn "sse2_movnti<mode>"
1352  [(set (match_operand:SWI48 0 "memory_operand" "=m")
1353	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1354		      UNSPEC_MOVNT))]
1355  "TARGET_SSE2"
1356  "movnti\t{%1, %0|%0, %1}"
1357  [(set_attr "type" "ssemov")
1358   (set_attr "prefix_data16" "0")
1359   (set_attr "mode" "<MODE>")])
1360
1361(define_insn "<sse>_movnt<mode>"
1362  [(set (match_operand:VF 0 "memory_operand" "=m")
1363	(unspec:VF
1364	  [(match_operand:VF 1 "register_operand" "v")]
1365	  UNSPEC_MOVNT))]
1366  "TARGET_SSE"
1367  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1368  [(set_attr "type" "ssemov")
1369   (set_attr "prefix" "maybe_vex")
1370   (set_attr "mode" "<MODE>")])
1371
1372(define_insn "<sse2>_movnt<mode>"
1373  [(set (match_operand:VI8 0 "memory_operand" "=m")
1374	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1375		    UNSPEC_MOVNT))]
1376  "TARGET_SSE2"
1377  "%vmovntdq\t{%1, %0|%0, %1}"
1378  [(set_attr "type" "ssecvt")
1379   (set (attr "prefix_data16")
1380     (if_then_else
1381       (match_test "TARGET_AVX")
1382     (const_string "*")
1383     (const_string "1")))
1384   (set_attr "prefix" "maybe_vex")
1385   (set_attr "mode" "<sseinsnmode>")])
1386
1387; Expand patterns for non-temporal stores.  At the moment, only those
1388; that directly map to insns are defined; it would be possible to
1389; define patterns for other modes that would expand to several insns.
1390
1391;; Modes handled by storent patterns.
1392(define_mode_iterator STORENT_MODE
1393  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1394   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1395   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1396   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1397   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1398
1399(define_expand "storent<mode>"
1400  [(set (match_operand:STORENT_MODE 0 "memory_operand")
1401	(unspec:STORENT_MODE
1402	  [(match_operand:STORENT_MODE 1 "register_operand")]
1403	  UNSPEC_MOVNT))]
1404  "TARGET_SSE")
1405
1406;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1407;;
1408;; Mask operations
1409;;
1410;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1411
1412;; All integer modes with AVX512BW/DQ.
1413(define_mode_iterator SWI1248_AVX512BWDQ
1414  [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1415
1416;; All integer modes with AVX512BW, where HImode operation
1417;; can be used instead of QImode.
1418(define_mode_iterator SWI1248_AVX512BW
1419  [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1420
1421;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1422(define_mode_iterator SWI1248_AVX512BWDQ2
1423  [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1424   (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1425
1426(define_expand "kmov<mskmodesuffix>"
1427  [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1428	(match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1429  "TARGET_AVX512F
1430   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1431
1432(define_insn "k<code><mode>"
1433  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1434	(any_logic:SWI1248_AVX512BW
1435	  (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1436	  (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1437   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1438  "TARGET_AVX512F"
1439{
1440  if (get_attr_mode (insn) == MODE_HI)
1441    return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1442  else
1443    return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1444}
1445  [(set_attr "type" "msklog")
1446   (set_attr "prefix" "vex")
1447   (set (attr "mode")
1448     (cond [(and (match_test "<MODE>mode == QImode")
1449		 (not (match_test "TARGET_AVX512DQ")))
1450	       (const_string "HI")
1451	   ]
1452	   (const_string "<MODE>")))])
1453
1454(define_insn "kandn<mode>"
1455  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1456	(and:SWI1248_AVX512BW
1457	  (not:SWI1248_AVX512BW
1458	    (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1459	  (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1460   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1461  "TARGET_AVX512F"
1462{
1463  if (get_attr_mode (insn) == MODE_HI)
1464    return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1465  else
1466    return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1467}
1468  [(set_attr "type" "msklog")
1469   (set_attr "prefix" "vex")
1470   (set (attr "mode")
1471     (cond [(and (match_test "<MODE>mode == QImode")
1472		 (not (match_test "TARGET_AVX512DQ")))
1473	      (const_string "HI")
1474	   ]
1475	   (const_string "<MODE>")))])
1476
1477(define_insn "kxnor<mode>"
1478  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1479	(not:SWI1248_AVX512BW
1480	  (xor:SWI1248_AVX512BW
1481	    (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1482	    (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1483   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1484  "TARGET_AVX512F"
1485{
1486  if (get_attr_mode (insn) == MODE_HI)
1487    return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1488  else
1489    return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1490}
1491  [(set_attr "type" "msklog")
1492   (set_attr "prefix" "vex")
1493   (set (attr "mode")
1494     (cond [(and (match_test "<MODE>mode == QImode")
1495		 (not (match_test "TARGET_AVX512DQ")))
1496	      (const_string "HI")
1497	   ]
1498	   (const_string "<MODE>")))])
1499
1500(define_insn "knot<mode>"
1501  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1502	(not:SWI1248_AVX512BW
1503	  (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1504   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1505  "TARGET_AVX512F"
1506{
1507  if (get_attr_mode (insn) == MODE_HI)
1508    return "knotw\t{%1, %0|%0, %1}";
1509  else
1510    return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1511}
1512  [(set_attr "type" "msklog")
1513   (set_attr "prefix" "vex")
1514   (set (attr "mode")
1515     (cond [(and (match_test "<MODE>mode == QImode")
1516		 (not (match_test "TARGET_AVX512DQ")))
1517	       (const_string "HI")
1518	   ]
1519	   (const_string "<MODE>")))])
1520
1521(define_insn "kadd<mode>"
1522  [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1523	(plus:SWI1248_AVX512BWDQ2
1524	  (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1525	  (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1526   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1527  "TARGET_AVX512F"
1528  "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1529  [(set_attr "type" "msklog")
1530   (set_attr "prefix" "vex")
1531   (set_attr "mode" "<MODE>")])
1532
1533;; Mask variant shift mnemonics
1534(define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1535
1536(define_insn "k<code><mode>"
1537  [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1538	(any_lshift:SWI1248_AVX512BWDQ
1539	  (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1540	  (match_operand:QI 2 "immediate_operand" "n")))
1541   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1542  "TARGET_AVX512F"
1543  "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1544  [(set_attr "type" "msklog")
1545   (set_attr "prefix" "vex")
1546   (set_attr "mode" "<MODE>")])
1547
1548(define_insn "ktest<mode>"
1549  [(set (reg:CC FLAGS_REG)
1550	(unspec:CC
1551	  [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1552	   (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1553	  UNSPEC_KTEST))]
1554  "TARGET_AVX512F"
1555  "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1556  [(set_attr "mode" "<MODE>")
1557   (set_attr "type" "msklog")
1558   (set_attr "prefix" "vex")])
1559
1560(define_insn "kortest<mode>"
1561  [(set (reg:CC FLAGS_REG)
1562	(unspec:CC
1563	  [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1564	   (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1565	  UNSPEC_KORTEST))]
1566  "TARGET_AVX512F"
1567  "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1568  [(set_attr "mode" "<MODE>")
1569   (set_attr "type" "msklog")
1570   (set_attr "prefix" "vex")])
1571
1572(define_insn "kunpckhi"
1573  [(set (match_operand:HI 0 "register_operand" "=k")
1574	(ior:HI
1575	  (ashift:HI
1576	    (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1577	    (const_int 8))
1578	  (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1579  "TARGET_AVX512F"
1580  "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1581  [(set_attr "mode" "HI")
1582   (set_attr "type" "msklog")
1583   (set_attr "prefix" "vex")])
1584
1585(define_insn "kunpcksi"
1586  [(set (match_operand:SI 0 "register_operand" "=k")
1587	(ior:SI
1588	  (ashift:SI
1589	    (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1590	    (const_int 16))
1591	  (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1592  "TARGET_AVX512BW"
1593  "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1594  [(set_attr "mode" "SI")])
1595
1596(define_insn "kunpckdi"
1597  [(set (match_operand:DI 0 "register_operand" "=k")
1598	(ior:DI
1599	  (ashift:DI
1600	    (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1601	    (const_int 32))
1602	  (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1603  "TARGET_AVX512BW"
1604  "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1605  [(set_attr "mode" "DI")])
1606
1607
1608;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1609;;
1610;; Parallel floating point arithmetic
1611;;
1612;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1613
1614(define_expand "<code><mode>2"
1615  [(set (match_operand:VF 0 "register_operand")
1616	(absneg:VF
1617	  (match_operand:VF 1 "register_operand")))]
1618  "TARGET_SSE"
1619  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1620
1621(define_insn_and_split "*absneg<mode>2"
1622  [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1623	(match_operator:VF 3 "absneg_operator"
1624	  [(match_operand:VF 1 "vector_operand" "0,  xBm,v, m")]))
1625   (use (match_operand:VF 2 "vector_operand"    "xBm,0,  vm,v"))]
1626  "TARGET_SSE"
1627  "#"
1628  "&& reload_completed"
1629  [(const_int 0)]
1630{
1631  enum rtx_code absneg_op;
1632  rtx op1, op2;
1633  rtx t;
1634
1635  if (TARGET_AVX)
1636    {
1637      if (MEM_P (operands[1]))
1638	op1 = operands[2], op2 = operands[1];
1639      else
1640	op1 = operands[1], op2 = operands[2];
1641    }
1642  else
1643    {
1644      op1 = operands[0];
1645      if (rtx_equal_p (operands[0], operands[1]))
1646	op2 = operands[2];
1647      else
1648	op2 = operands[1];
1649    }
1650
1651  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1652  t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1653  t = gen_rtx_SET (operands[0], t);
1654  emit_insn (t);
1655  DONE;
1656}
1657  [(set_attr "isa" "noavx,noavx,avx,avx")])
1658
1659(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1660  [(set (match_operand:VF 0 "register_operand")
1661	(plusminus:VF
1662	  (match_operand:VF 1 "<round_nimm_predicate>")
1663	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1664  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1665  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1666
1667(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1668  [(set (match_operand:VF 0 "register_operand" "=x,v")
1669	(plusminus:VF
1670	  (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1671	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1672  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1673   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1674  "@
1675   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1676   v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1677  [(set_attr "isa" "noavx,avx")
1678   (set_attr "type" "sseadd")
1679   (set_attr "prefix" "<mask_prefix3>")
1680   (set_attr "mode" "<MODE>")])
1681
1682(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1683  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1684	(vec_merge:VF_128
1685	  (plusminus:VF_128
1686	    (match_operand:VF_128 1 "register_operand" "0,v")
1687	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1688	  (match_dup 1)
1689	  (const_int 1)))]
1690  "TARGET_SSE"
1691  "@
1692   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1693   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1694  [(set_attr "isa" "noavx,avx")
1695   (set_attr "type" "sseadd")
1696   (set_attr "prefix" "<round_scalar_prefix>")
1697   (set_attr "mode" "<ssescalarmode>")])
1698
1699(define_expand "mul<mode>3<mask_name><round_name>"
1700  [(set (match_operand:VF 0 "register_operand")
1701	(mult:VF
1702	  (match_operand:VF 1 "<round_nimm_predicate>")
1703	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1704  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1705  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1706
1707(define_insn "*mul<mode>3<mask_name><round_name>"
1708  [(set (match_operand:VF 0 "register_operand" "=x,v")
1709	(mult:VF
1710	  (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1711	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1712  "TARGET_SSE
1713   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1714   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1715  "@
1716   mul<ssemodesuffix>\t{%2, %0|%0, %2}
1717   vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1718  [(set_attr "isa" "noavx,avx")
1719   (set_attr "type" "ssemul")
1720   (set_attr "prefix" "<mask_prefix3>")
1721   (set_attr "btver2_decode" "direct,double")
1722   (set_attr "mode" "<MODE>")])
1723
1724(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1725  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1726	(vec_merge:VF_128
1727	  (multdiv:VF_128
1728	    (match_operand:VF_128 1 "register_operand" "0,v")
1729	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1730	  (match_dup 1)
1731	  (const_int 1)))]
1732  "TARGET_SSE"
1733  "@
1734   <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1735   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1736  [(set_attr "isa" "noavx,avx")
1737   (set_attr "type" "sse<multdiv_mnemonic>")
1738   (set_attr "prefix" "<round_scalar_prefix>")
1739   (set_attr "btver2_decode" "direct,double")
1740   (set_attr "mode" "<ssescalarmode>")])
1741
1742(define_expand "div<mode>3"
1743  [(set (match_operand:VF2 0 "register_operand")
1744	(div:VF2 (match_operand:VF2 1 "register_operand")
1745		 (match_operand:VF2 2 "vector_operand")))]
1746  "TARGET_SSE2"
1747  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1748
1749(define_expand "div<mode>3"
1750  [(set (match_operand:VF1 0 "register_operand")
1751	(div:VF1 (match_operand:VF1 1 "register_operand")
1752		 (match_operand:VF1 2 "vector_operand")))]
1753  "TARGET_SSE"
1754{
1755  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1756
1757  if (TARGET_SSE_MATH
1758      && TARGET_RECIP_VEC_DIV
1759      && !optimize_insn_for_size_p ()
1760      && flag_finite_math_only && !flag_trapping_math
1761      && flag_unsafe_math_optimizations)
1762    {
1763      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1764      DONE;
1765    }
1766})
1767
1768(define_insn "<sse>_div<mode>3<mask_name><round_name>"
1769  [(set (match_operand:VF 0 "register_operand" "=x,v")
1770	(div:VF
1771	  (match_operand:VF 1 "register_operand" "0,v")
1772	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1773  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1774  "@
1775   div<ssemodesuffix>\t{%2, %0|%0, %2}
1776   vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1777  [(set_attr "isa" "noavx,avx")
1778   (set_attr "type" "ssediv")
1779   (set_attr "prefix" "<mask_prefix3>")
1780   (set_attr "mode" "<MODE>")])
1781
1782(define_insn "<sse>_rcp<mode>2"
1783  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1784	(unspec:VF1_128_256
1785	  [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1786  "TARGET_SSE"
1787  "%vrcpps\t{%1, %0|%0, %1}"
1788  [(set_attr "type" "sse")
1789   (set_attr "atom_sse_attr" "rcp")
1790   (set_attr "btver2_sse_attr" "rcp")
1791   (set_attr "prefix" "maybe_vex")
1792   (set_attr "mode" "<MODE>")])
1793
1794(define_insn "sse_vmrcpv4sf2"
1795  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1796	(vec_merge:V4SF
1797	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1798		       UNSPEC_RCP)
1799	  (match_operand:V4SF 2 "register_operand" "0,x")
1800	  (const_int 1)))]
1801  "TARGET_SSE"
1802  "@
1803   rcpss\t{%1, %0|%0, %k1}
1804   vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1805  [(set_attr "isa" "noavx,avx")
1806   (set_attr "type" "sse")
1807   (set_attr "atom_sse_attr" "rcp")
1808   (set_attr "btver2_sse_attr" "rcp")
1809   (set_attr "prefix" "orig,vex")
1810   (set_attr "mode" "SF")])
1811
1812(define_insn "<mask_codefor>rcp14<mode><mask_name>"
1813  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1814	(unspec:VF_AVX512VL
1815	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1816	  UNSPEC_RCP14))]
1817  "TARGET_AVX512F"
1818  "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1819  [(set_attr "type" "sse")
1820   (set_attr "prefix" "evex")
1821   (set_attr "mode" "<MODE>")])
1822
1823(define_insn "srcp14<mode>"
1824  [(set (match_operand:VF_128 0 "register_operand" "=v")
1825	(vec_merge:VF_128
1826	  (unspec:VF_128
1827	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1828	    UNSPEC_RCP14)
1829	  (match_operand:VF_128 2 "register_operand" "v")
1830	  (const_int 1)))]
1831  "TARGET_AVX512F"
1832  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1833  [(set_attr "type" "sse")
1834   (set_attr "prefix" "evex")
1835   (set_attr "mode" "<MODE>")])
1836
1837(define_insn "srcp14<mode>_mask"
1838  [(set (match_operand:VF_128 0 "register_operand" "=v")
1839	(vec_merge:VF_128
1840	  (vec_merge:VF_128
1841	    (unspec:VF_128
1842	      [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1843	    UNSPEC_RCP14)
1844	      (match_operand:VF_128 3 "vector_move_operand" "0C")
1845	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1846	  (match_operand:VF_128 2 "register_operand" "v")
1847	  (const_int 1)))]
1848  "TARGET_AVX512F"
1849  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1850  [(set_attr "type" "sse")
1851   (set_attr "prefix" "evex")
1852   (set_attr "mode" "<MODE>")])
1853
1854(define_expand "sqrt<mode>2"
1855  [(set (match_operand:VF2 0 "register_operand")
1856	(sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1857  "TARGET_SSE2")
1858
1859(define_expand "sqrt<mode>2"
1860  [(set (match_operand:VF1 0 "register_operand")
1861	(sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1862  "TARGET_SSE"
1863{
1864  if (TARGET_SSE_MATH
1865      && TARGET_RECIP_VEC_SQRT
1866      && !optimize_insn_for_size_p ()
1867      && flag_finite_math_only && !flag_trapping_math
1868      && flag_unsafe_math_optimizations)
1869    {
1870      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1871      DONE;
1872    }
1873})
1874
1875(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1876  [(set (match_operand:VF 0 "register_operand" "=x,v")
1877	(sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1878  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1879  "@
1880   sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1881   vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1882  [(set_attr "isa" "noavx,avx")
1883   (set_attr "type" "sse")
1884   (set_attr "atom_sse_attr" "sqrt")
1885   (set_attr "btver2_sse_attr" "sqrt")
1886   (set_attr "prefix" "maybe_vex")
1887   (set_attr "mode" "<MODE>")])
1888
1889(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
1890  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1891	(vec_merge:VF_128
1892	  (sqrt:VF_128
1893	    (match_operand:VF_128 1 "vector_operand" "xBm,<round_scalar_constraint>"))
1894	  (match_operand:VF_128 2 "register_operand" "0,v")
1895	  (const_int 1)))]
1896  "TARGET_SSE"
1897  "@
1898   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1899   vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
1900  [(set_attr "isa" "noavx,avx")
1901   (set_attr "type" "sse")
1902   (set_attr "atom_sse_attr" "sqrt")
1903   (set_attr "prefix" "<round_scalar_prefix>")
1904   (set_attr "btver2_sse_attr" "sqrt")
1905   (set_attr "mode" "<ssescalarmode>")])
1906
1907(define_expand "rsqrt<mode>2"
1908  [(set (match_operand:VF1_128_256 0 "register_operand")
1909	(unspec:VF1_128_256
1910	  [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1911  "TARGET_SSE && TARGET_SSE_MATH"
1912{
1913  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1914  DONE;
1915})
1916
1917(define_expand "rsqrtv16sf2"
1918  [(set (match_operand:V16SF 0 "register_operand")
1919	(unspec:V16SF
1920	  [(match_operand:V16SF 1 "vector_operand")]
1921	  UNSPEC_RSQRT28))]
1922  "TARGET_SSE_MATH && TARGET_AVX512ER"
1923{
1924  ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
1925  DONE;
1926})
1927
1928(define_insn "<sse>_rsqrt<mode>2"
1929  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1930	(unspec:VF1_128_256
1931	  [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1932  "TARGET_SSE"
1933  "%vrsqrtps\t{%1, %0|%0, %1}"
1934  [(set_attr "type" "sse")
1935   (set_attr "prefix" "maybe_vex")
1936   (set_attr "mode" "<MODE>")])
1937
1938(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1939  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1940	(unspec:VF_AVX512VL
1941	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1942	  UNSPEC_RSQRT14))]
1943  "TARGET_AVX512F"
1944  "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1945  [(set_attr "type" "sse")
1946   (set_attr "prefix" "evex")
1947   (set_attr "mode" "<MODE>")])
1948
1949(define_insn "rsqrt14<mode>"
1950  [(set (match_operand:VF_128 0 "register_operand" "=v")
1951	(vec_merge:VF_128
1952	  (unspec:VF_128
1953	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1954	    UNSPEC_RSQRT14)
1955	  (match_operand:VF_128 2 "register_operand" "v")
1956	  (const_int 1)))]
1957  "TARGET_AVX512F"
1958  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1959  [(set_attr "type" "sse")
1960   (set_attr "prefix" "evex")
1961   (set_attr "mode" "<MODE>")])
1962
1963(define_insn "rsqrt14_<mode>_mask"
1964  [(set (match_operand:VF_128 0 "register_operand" "=v")
1965	(vec_merge:VF_128
1966	  (vec_merge:VF_128
1967	    (unspec:VF_128
1968	      [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1969	      UNSPEC_RSQRT14)
1970	      (match_operand:VF_128 3 "vector_move_operand" "0C")
1971	      (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1972	  (match_operand:VF_128 2 "register_operand" "v")
1973	  (const_int 1)))]
1974  "TARGET_AVX512F"
1975  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1976  [(set_attr "type" "sse")
1977   (set_attr "prefix" "evex")
1978   (set_attr "mode" "<MODE>")])
1979
1980(define_insn "sse_vmrsqrtv4sf2"
1981  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1982	(vec_merge:V4SF
1983	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1984		       UNSPEC_RSQRT)
1985	  (match_operand:V4SF 2 "register_operand" "0,x")
1986	  (const_int 1)))]
1987  "TARGET_SSE"
1988  "@
1989   rsqrtss\t{%1, %0|%0, %k1}
1990   vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1991  [(set_attr "isa" "noavx,avx")
1992   (set_attr "type" "sse")
1993   (set_attr "prefix" "orig,vex")
1994   (set_attr "mode" "SF")])
1995
1996(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1997  [(set (match_operand:VF 0 "register_operand")
1998	(smaxmin:VF
1999	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2000	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2001  "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2002{
2003  if (!flag_finite_math_only || flag_signed_zeros)
2004    {
2005      operands[1] = force_reg (<MODE>mode, operands[1]);
2006      emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2007		 (operands[0], operands[1], operands[2]
2008		  <mask_operand_arg34>
2009		  <round_saeonly_mask_arg3>));
2010      DONE;
2011    }
2012  else
2013    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2014})
2015
2016;; These versions of the min/max patterns are intentionally ignorant of
2017;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2018;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2019;; are undefined in this condition, we're certain this is correct.
2020
2021(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2022  [(set (match_operand:VF 0 "register_operand" "=x,v")
2023	(smaxmin:VF
2024	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2025	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2026  "TARGET_SSE
2027   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2028   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2029  "@
2030   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2031   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2032  [(set_attr "isa" "noavx,avx")
2033   (set_attr "type" "sseadd")
2034   (set_attr "btver2_sse_attr" "maxmin")
2035   (set_attr "prefix" "<mask_prefix3>")
2036   (set_attr "mode" "<MODE>")])
2037
2038;; These versions of the min/max patterns implement exactly the operations
2039;;   min = (op1 < op2 ? op1 : op2)
2040;;   max = (!(op1 < op2) ? op1 : op2)
2041;; Their operands are not commutative, and thus they may be used in the
2042;; presence of -0.0 and NaN.
2043
2044(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2045  [(set (match_operand:VF 0 "register_operand" "=x,v")
2046	(unspec:VF
2047	  [(match_operand:VF 1 "register_operand" "0,v")
2048	   (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2049	  IEEE_MAXMIN))]
2050  "TARGET_SSE
2051   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2052  "@
2053   <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2054   v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2055  [(set_attr "isa" "noavx,avx")
2056   (set_attr "type" "sseadd")
2057   (set_attr "btver2_sse_attr" "maxmin")
2058   (set_attr "prefix" "<mask_prefix3>")
2059   (set_attr "mode" "<MODE>")])
2060
2061(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2062  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2063	(vec_merge:VF_128
2064	  (smaxmin:VF_128
2065	    (match_operand:VF_128 1 "register_operand" "0,v")
2066	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2067	 (match_dup 1)
2068	 (const_int 1)))]
2069  "TARGET_SSE"
2070  "@
2071   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2072   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2073  [(set_attr "isa" "noavx,avx")
2074   (set_attr "type" "sse")
2075   (set_attr "btver2_sse_attr" "maxmin")
2076   (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2077   (set_attr "mode" "<ssescalarmode>")])
2078
2079(define_insn "avx_addsubv4df3"
2080  [(set (match_operand:V4DF 0 "register_operand" "=x")
2081	(vec_merge:V4DF
2082	  (minus:V4DF
2083	    (match_operand:V4DF 1 "register_operand" "x")
2084	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2085	  (plus:V4DF (match_dup 1) (match_dup 2))
2086	  (const_int 5)))]
2087  "TARGET_AVX"
2088  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2089  [(set_attr "type" "sseadd")
2090   (set_attr "prefix" "vex")
2091   (set_attr "mode" "V4DF")])
2092
2093(define_insn "sse3_addsubv2df3"
2094  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2095	(vec_merge:V2DF
2096	  (minus:V2DF
2097	    (match_operand:V2DF 1 "register_operand" "0,x")
2098	    (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2099	  (plus:V2DF (match_dup 1) (match_dup 2))
2100	  (const_int 1)))]
2101  "TARGET_SSE3"
2102  "@
2103   addsubpd\t{%2, %0|%0, %2}
2104   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2105  [(set_attr "isa" "noavx,avx")
2106   (set_attr "type" "sseadd")
2107   (set_attr "atom_unit" "complex")
2108   (set_attr "prefix" "orig,vex")
2109   (set_attr "mode" "V2DF")])
2110
2111(define_insn "avx_addsubv8sf3"
2112  [(set (match_operand:V8SF 0 "register_operand" "=x")
2113	(vec_merge:V8SF
2114	  (minus:V8SF
2115	    (match_operand:V8SF 1 "register_operand" "x")
2116	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2117	  (plus:V8SF (match_dup 1) (match_dup 2))
2118	  (const_int 85)))]
2119  "TARGET_AVX"
2120  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2121  [(set_attr "type" "sseadd")
2122   (set_attr "prefix" "vex")
2123   (set_attr "mode" "V8SF")])
2124
2125(define_insn "sse3_addsubv4sf3"
2126  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2127	(vec_merge:V4SF
2128	  (minus:V4SF
2129	    (match_operand:V4SF 1 "register_operand" "0,x")
2130	    (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2131	  (plus:V4SF (match_dup 1) (match_dup 2))
2132	  (const_int 5)))]
2133  "TARGET_SSE3"
2134  "@
2135   addsubps\t{%2, %0|%0, %2}
2136   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2137  [(set_attr "isa" "noavx,avx")
2138   (set_attr "type" "sseadd")
2139   (set_attr "prefix" "orig,vex")
2140   (set_attr "prefix_rep" "1,*")
2141   (set_attr "mode" "V4SF")])
2142
2143(define_split
2144  [(set (match_operand:VF_128_256 0 "register_operand")
2145	(match_operator:VF_128_256 6 "addsub_vm_operator"
2146	  [(minus:VF_128_256
2147	     (match_operand:VF_128_256 1 "register_operand")
2148	     (match_operand:VF_128_256 2 "vector_operand"))
2149	   (plus:VF_128_256
2150	     (match_operand:VF_128_256 3 "vector_operand")
2151	     (match_operand:VF_128_256 4 "vector_operand"))
2152	   (match_operand 5 "const_int_operand")]))]
2153  "TARGET_SSE3
2154   && can_create_pseudo_p ()
2155   && ((rtx_equal_p (operands[1], operands[3])
2156	&& rtx_equal_p (operands[2], operands[4]))
2157       || (rtx_equal_p (operands[1], operands[4])
2158	   && rtx_equal_p (operands[2], operands[3])))"
2159  [(set (match_dup 0)
2160	(vec_merge:VF_128_256
2161	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
2162	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
2163	  (match_dup 5)))])
2164
2165(define_split
2166  [(set (match_operand:VF_128_256 0 "register_operand")
2167	(match_operator:VF_128_256 6 "addsub_vm_operator"
2168	  [(plus:VF_128_256
2169	     (match_operand:VF_128_256 1 "vector_operand")
2170	     (match_operand:VF_128_256 2 "vector_operand"))
2171	   (minus:VF_128_256
2172	     (match_operand:VF_128_256 3 "register_operand")
2173	     (match_operand:VF_128_256 4 "vector_operand"))
2174	   (match_operand 5 "const_int_operand")]))]
2175  "TARGET_SSE3
2176   && can_create_pseudo_p ()
2177   && ((rtx_equal_p (operands[1], operands[3])
2178	&& rtx_equal_p (operands[2], operands[4]))
2179       || (rtx_equal_p (operands[1], operands[4])
2180	   && rtx_equal_p (operands[2], operands[3])))"
2181  [(set (match_dup 0)
2182	(vec_merge:VF_128_256
2183	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
2184	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
2185	  (match_dup 5)))]
2186{
2187  /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes.  */
2188  operands[5]
2189    = GEN_INT (~INTVAL (operands[5])
2190	       & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2191})
2192
2193(define_split
2194  [(set (match_operand:VF_128_256 0 "register_operand")
2195	(match_operator:VF_128_256 7 "addsub_vs_operator"
2196	  [(vec_concat:<ssedoublemode>
2197	     (minus:VF_128_256
2198	       (match_operand:VF_128_256 1 "register_operand")
2199	       (match_operand:VF_128_256 2 "vector_operand"))
2200	     (plus:VF_128_256
2201	       (match_operand:VF_128_256 3 "vector_operand")
2202	       (match_operand:VF_128_256 4 "vector_operand")))
2203	   (match_parallel 5 "addsub_vs_parallel"
2204	     [(match_operand 6 "const_int_operand")])]))]
2205  "TARGET_SSE3
2206   && can_create_pseudo_p ()
2207   && ((rtx_equal_p (operands[1], operands[3])
2208	&& rtx_equal_p (operands[2], operands[4]))
2209       || (rtx_equal_p (operands[1], operands[4])
2210	   && rtx_equal_p (operands[2], operands[3])))"
2211  [(set (match_dup 0)
2212	(vec_merge:VF_128_256
2213	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
2214	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
2215	  (match_dup 5)))]
2216{
2217  int i, nelt = XVECLEN (operands[5], 0);
2218  HOST_WIDE_INT ival = 0;
2219
2220  for (i = 0; i < nelt; i++)
2221    if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2222      ival |= HOST_WIDE_INT_1 << i;
2223
2224  operands[5] = GEN_INT (ival);
2225})
2226
2227(define_split
2228  [(set (match_operand:VF_128_256 0 "register_operand")
2229	(match_operator:VF_128_256 7 "addsub_vs_operator"
2230	  [(vec_concat:<ssedoublemode>
2231	     (plus:VF_128_256
2232	       (match_operand:VF_128_256 1 "vector_operand")
2233	       (match_operand:VF_128_256 2 "vector_operand"))
2234	     (minus:VF_128_256
2235	       (match_operand:VF_128_256 3 "register_operand")
2236	       (match_operand:VF_128_256 4 "vector_operand")))
2237	   (match_parallel 5 "addsub_vs_parallel"
2238	     [(match_operand 6 "const_int_operand")])]))]
2239  "TARGET_SSE3
2240   && can_create_pseudo_p ()
2241   && ((rtx_equal_p (operands[1], operands[3])
2242	&& rtx_equal_p (operands[2], operands[4]))
2243       || (rtx_equal_p (operands[1], operands[4])
2244	   && rtx_equal_p (operands[2], operands[3])))"
2245  [(set (match_dup 0)
2246	(vec_merge:VF_128_256
2247	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
2248	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
2249	  (match_dup 5)))]
2250{
2251  int i, nelt = XVECLEN (operands[5], 0);
2252  HOST_WIDE_INT ival = 0;
2253
2254  for (i = 0; i < nelt; i++)
2255    if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2256      ival |= HOST_WIDE_INT_1 << i;
2257
2258  operands[5] = GEN_INT (ival);
2259})
2260
2261(define_insn "avx_h<plusminus_insn>v4df3"
2262  [(set (match_operand:V4DF 0 "register_operand" "=x")
2263	(vec_concat:V4DF
2264	  (vec_concat:V2DF
2265	    (plusminus:DF
2266	      (vec_select:DF
2267		(match_operand:V4DF 1 "register_operand" "x")
2268		(parallel [(const_int 0)]))
2269	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2270	    (plusminus:DF
2271	      (vec_select:DF
2272		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
2273		(parallel [(const_int 0)]))
2274	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2275	  (vec_concat:V2DF
2276	    (plusminus:DF
2277	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2278	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2279	    (plusminus:DF
2280	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2281	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2282  "TARGET_AVX"
2283  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2284  [(set_attr "type" "sseadd")
2285   (set_attr "prefix" "vex")
2286   (set_attr "mode" "V4DF")])
2287
2288(define_expand "sse3_haddv2df3"
2289  [(set (match_operand:V2DF 0 "register_operand")
2290	(vec_concat:V2DF
2291	  (plus:DF
2292	    (vec_select:DF
2293	      (match_operand:V2DF 1 "register_operand")
2294	      (parallel [(const_int 0)]))
2295	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2296	  (plus:DF
2297	    (vec_select:DF
2298	      (match_operand:V2DF 2 "vector_operand")
2299	      (parallel [(const_int 0)]))
2300	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2301  "TARGET_SSE3")
2302
2303(define_insn "*sse3_haddv2df3"
2304  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2305	(vec_concat:V2DF
2306	  (plus:DF
2307	    (vec_select:DF
2308	      (match_operand:V2DF 1 "register_operand" "0,x")
2309	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2310	    (vec_select:DF
2311	      (match_dup 1)
2312	      (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2313	  (plus:DF
2314	    (vec_select:DF
2315	      (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2316	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2317	    (vec_select:DF
2318	      (match_dup 2)
2319	      (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2320  "TARGET_SSE3
2321   && INTVAL (operands[3]) != INTVAL (operands[4])
2322   && INTVAL (operands[5]) != INTVAL (operands[6])"
2323  "@
2324   haddpd\t{%2, %0|%0, %2}
2325   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2326  [(set_attr "isa" "noavx,avx")
2327   (set_attr "type" "sseadd")
2328   (set_attr "prefix" "orig,vex")
2329   (set_attr "mode" "V2DF")])
2330
2331(define_insn "sse3_hsubv2df3"
2332  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2333	(vec_concat:V2DF
2334	  (minus:DF
2335	    (vec_select:DF
2336	      (match_operand:V2DF 1 "register_operand" "0,x")
2337	      (parallel [(const_int 0)]))
2338	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2339	  (minus:DF
2340	    (vec_select:DF
2341	      (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2342	      (parallel [(const_int 0)]))
2343	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2344  "TARGET_SSE3"
2345  "@
2346   hsubpd\t{%2, %0|%0, %2}
2347   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2348  [(set_attr "isa" "noavx,avx")
2349   (set_attr "type" "sseadd")
2350   (set_attr "prefix" "orig,vex")
2351   (set_attr "mode" "V2DF")])
2352
2353(define_insn "*sse3_haddv2df3_low"
2354  [(set (match_operand:DF 0 "register_operand" "=x,x")
2355	(plus:DF
2356	  (vec_select:DF
2357	    (match_operand:V2DF 1 "register_operand" "0,x")
2358	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2359	  (vec_select:DF
2360	    (match_dup 1)
2361	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2362  "TARGET_SSE3
2363   && INTVAL (operands[2]) != INTVAL (operands[3])"
2364  "@
2365   haddpd\t{%0, %0|%0, %0}
2366   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2367  [(set_attr "isa" "noavx,avx")
2368   (set_attr "type" "sseadd1")
2369   (set_attr "prefix" "orig,vex")
2370   (set_attr "mode" "V2DF")])
2371
2372(define_insn "*sse3_hsubv2df3_low"
2373  [(set (match_operand:DF 0 "register_operand" "=x,x")
2374	(minus:DF
2375	  (vec_select:DF
2376	    (match_operand:V2DF 1 "register_operand" "0,x")
2377	    (parallel [(const_int 0)]))
2378	  (vec_select:DF
2379	    (match_dup 1)
2380	    (parallel [(const_int 1)]))))]
2381  "TARGET_SSE3"
2382  "@
2383   hsubpd\t{%0, %0|%0, %0}
2384   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2385  [(set_attr "isa" "noavx,avx")
2386   (set_attr "type" "sseadd1")
2387   (set_attr "prefix" "orig,vex")
2388   (set_attr "mode" "V2DF")])
2389
2390(define_insn "avx_h<plusminus_insn>v8sf3"
2391  [(set (match_operand:V8SF 0 "register_operand" "=x")
2392	(vec_concat:V8SF
2393	  (vec_concat:V4SF
2394	    (vec_concat:V2SF
2395	      (plusminus:SF
2396		(vec_select:SF
2397		  (match_operand:V8SF 1 "register_operand" "x")
2398		  (parallel [(const_int 0)]))
2399		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2400	      (plusminus:SF
2401		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2402		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2403	    (vec_concat:V2SF
2404	      (plusminus:SF
2405		(vec_select:SF
2406		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2407		  (parallel [(const_int 0)]))
2408		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2409	      (plusminus:SF
2410		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2411		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2412	  (vec_concat:V4SF
2413	    (vec_concat:V2SF
2414	      (plusminus:SF
2415		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2416		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2417	      (plusminus:SF
2418		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2419		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2420	    (vec_concat:V2SF
2421	      (plusminus:SF
2422		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2423		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2424	      (plusminus:SF
2425		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2426		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2427  "TARGET_AVX"
2428  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2429  [(set_attr "type" "sseadd")
2430   (set_attr "prefix" "vex")
2431   (set_attr "mode" "V8SF")])
2432
2433(define_insn "sse3_h<plusminus_insn>v4sf3"
2434  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2435	(vec_concat:V4SF
2436	  (vec_concat:V2SF
2437	    (plusminus:SF
2438	      (vec_select:SF
2439		(match_operand:V4SF 1 "register_operand" "0,x")
2440		(parallel [(const_int 0)]))
2441	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2442	    (plusminus:SF
2443	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2444	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2445	  (vec_concat:V2SF
2446	    (plusminus:SF
2447	      (vec_select:SF
2448		(match_operand:V4SF 2 "vector_operand" "xBm,xm")
2449		(parallel [(const_int 0)]))
2450	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2451	    (plusminus:SF
2452	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2453	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2454  "TARGET_SSE3"
2455  "@
2456   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2457   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2458  [(set_attr "isa" "noavx,avx")
2459   (set_attr "type" "sseadd")
2460   (set_attr "atom_unit" "complex")
2461   (set_attr "prefix" "orig,vex")
2462   (set_attr "prefix_rep" "1,*")
2463   (set_attr "mode" "V4SF")])
2464
2465(define_expand "reduc_plus_scal_v8df"
2466  [(match_operand:DF 0 "register_operand")
2467   (match_operand:V8DF 1 "register_operand")]
2468  "TARGET_AVX512F"
2469{
2470  rtx tmp = gen_reg_rtx (V8DFmode);
2471  ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2472  emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
2473  DONE;
2474})
2475
2476(define_expand "reduc_plus_scal_v4df"
2477  [(match_operand:DF 0 "register_operand")
2478   (match_operand:V4DF 1 "register_operand")]
2479  "TARGET_AVX"
2480{
2481  rtx tmp = gen_reg_rtx (V4DFmode);
2482  rtx tmp2 = gen_reg_rtx (V4DFmode);
2483  rtx vec_res = gen_reg_rtx (V4DFmode);
2484  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2485  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2486  emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2487  emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
2488  DONE;
2489})
2490
2491(define_expand "reduc_plus_scal_v2df"
2492  [(match_operand:DF 0 "register_operand")
2493   (match_operand:V2DF 1 "register_operand")]
2494  "TARGET_SSE3"
2495{
2496  rtx tmp = gen_reg_rtx (V2DFmode);
2497  emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2498  emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
2499  DONE;
2500})
2501
2502(define_expand "reduc_plus_scal_v16sf"
2503  [(match_operand:SF 0 "register_operand")
2504   (match_operand:V16SF 1 "register_operand")]
2505  "TARGET_AVX512F"
2506{
2507  rtx tmp = gen_reg_rtx (V16SFmode);
2508  ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2509  emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
2510  DONE;
2511})
2512
2513(define_expand "reduc_plus_scal_v8sf"
2514  [(match_operand:SF 0 "register_operand")
2515   (match_operand:V8SF 1 "register_operand")]
2516  "TARGET_AVX"
2517{
2518  rtx tmp = gen_reg_rtx (V8SFmode);
2519  rtx tmp2 = gen_reg_rtx (V8SFmode);
2520  rtx vec_res = gen_reg_rtx (V8SFmode);
2521  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2522  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2523  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2524  emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2525  emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
2526  DONE;
2527})
2528
2529(define_expand "reduc_plus_scal_v4sf"
2530  [(match_operand:SF 0 "register_operand")
2531   (match_operand:V4SF 1 "register_operand")]
2532  "TARGET_SSE"
2533{
2534  rtx vec_res = gen_reg_rtx (V4SFmode);
2535  if (TARGET_SSE3)
2536    {
2537      rtx tmp = gen_reg_rtx (V4SFmode);
2538      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2539      emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2540    }
2541  else
2542    ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2543  emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
2544  DONE;
2545})
2546
2547;; Modes handled by reduc_sm{in,ax}* patterns.
2548(define_mode_iterator REDUC_SMINMAX_MODE
2549  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2550   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2551   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2552   (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2553   (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2554   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2555   (V8DF "TARGET_AVX512F")])
2556
2557(define_expand "reduc_<code>_scal_<mode>"
2558  [(smaxmin:REDUC_SMINMAX_MODE
2559     (match_operand:<ssescalarmode> 0 "register_operand")
2560     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2561  ""
2562{
2563  rtx tmp = gen_reg_rtx (<MODE>mode);
2564  ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2565  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2566							const0_rtx));
2567  DONE;
2568})
2569
2570(define_expand "reduc_<code>_scal_<mode>"
2571  [(umaxmin:VI_AVX512BW
2572     (match_operand:<ssescalarmode> 0 "register_operand")
2573     (match_operand:VI_AVX512BW 1 "register_operand"))]
2574  "TARGET_AVX512F"
2575{
2576  rtx tmp = gen_reg_rtx (<MODE>mode);
2577  ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2578  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2579  							const0_rtx));
2580  DONE;
2581})
2582
2583(define_expand "reduc_<code>_scal_<mode>"
2584  [(umaxmin:VI_256
2585     (match_operand:<ssescalarmode> 0 "register_operand")
2586     (match_operand:VI_256 1 "register_operand"))]
2587  "TARGET_AVX2"
2588{
2589  rtx tmp = gen_reg_rtx (<MODE>mode);
2590  ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2591  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2592							const0_rtx));
2593  DONE;
2594})
2595
2596(define_expand "reduc_umin_scal_v8hi"
2597  [(umin:V8HI
2598     (match_operand:HI 0 "register_operand")
2599     (match_operand:V8HI 1 "register_operand"))]
2600  "TARGET_SSE4_1"
2601{
2602  rtx tmp = gen_reg_rtx (V8HImode);
2603  ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2604  emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2605  DONE;
2606})
2607
2608(define_insn "<mask_codefor>reducep<mode><mask_name>"
2609  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2610	(unspec:VF_AVX512VL
2611	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2612	   (match_operand:SI 2 "const_0_to_255_operand")]
2613	  UNSPEC_REDUCE))]
2614  "TARGET_AVX512DQ"
2615  "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2616  [(set_attr "type" "sse")
2617   (set_attr "prefix" "evex")
2618   (set_attr "mode" "<MODE>")])
2619
2620(define_insn "reduces<mode><mask_scalar_name>"
2621  [(set (match_operand:VF_128 0 "register_operand" "=v")
2622	(vec_merge:VF_128
2623	  (unspec:VF_128
2624	    [(match_operand:VF_128 1 "register_operand" "v")
2625	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2626	     (match_operand:SI 3 "const_0_to_255_operand")]
2627	    UNSPEC_REDUCE)
2628	  (match_dup 1)
2629	  (const_int 1)))]
2630  "TARGET_AVX512DQ"
2631  "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2632  [(set_attr "type" "sse")
2633   (set_attr "prefix" "evex")
2634   (set_attr "mode" "<MODE>")])
2635
2636;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2637;;
2638;; Parallel floating point comparisons
2639;;
2640;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2641
2642(define_insn "avx_cmp<mode>3"
2643  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2644	(unspec:VF_128_256
2645	  [(match_operand:VF_128_256 1 "register_operand" "x")
2646	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2647	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
2648	  UNSPEC_PCMP))]
2649  "TARGET_AVX"
2650  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2651  [(set_attr "type" "ssecmp")
2652   (set_attr "length_immediate" "1")
2653   (set_attr "prefix" "vex")
2654   (set_attr "mode" "<MODE>")])
2655
2656(define_insn "avx_vmcmp<mode>3"
2657  [(set (match_operand:VF_128 0 "register_operand" "=x")
2658	(vec_merge:VF_128
2659	  (unspec:VF_128
2660	    [(match_operand:VF_128 1 "register_operand" "x")
2661	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2662	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2663	    UNSPEC_PCMP)
2664	 (match_dup 1)
2665	 (const_int 1)))]
2666  "TARGET_AVX"
2667  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2668  [(set_attr "type" "ssecmp")
2669   (set_attr "length_immediate" "1")
2670   (set_attr "prefix" "vex")
2671   (set_attr "mode" "<ssescalarmode>")])
2672
2673(define_insn "*<sse>_maskcmp<mode>3_comm"
2674  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2675	(match_operator:VF_128_256 3 "sse_comparison_operator"
2676	  [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2677	   (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2678  "TARGET_SSE
2679   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2680  "@
2681   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2682   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2683  [(set_attr "isa" "noavx,avx")
2684   (set_attr "type" "ssecmp")
2685   (set_attr "length_immediate" "1")
2686   (set_attr "prefix" "orig,vex")
2687   (set_attr "mode" "<MODE>")])
2688
2689(define_insn "<sse>_maskcmp<mode>3"
2690  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2691	(match_operator:VF_128_256 3 "sse_comparison_operator"
2692	  [(match_operand:VF_128_256 1 "register_operand" "0,x")
2693	   (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2694  "TARGET_SSE"
2695  "@
2696   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2697   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2698  [(set_attr "isa" "noavx,avx")
2699   (set_attr "type" "ssecmp")
2700   (set_attr "length_immediate" "1")
2701   (set_attr "prefix" "orig,vex")
2702   (set_attr "mode" "<MODE>")])
2703
2704(define_insn "<sse>_vmmaskcmp<mode>3"
2705  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2706	(vec_merge:VF_128
2707	 (match_operator:VF_128 3 "sse_comparison_operator"
2708	   [(match_operand:VF_128 1 "register_operand" "0,x")
2709	    (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2710	 (match_dup 1)
2711	 (const_int 1)))]
2712  "TARGET_SSE"
2713  "@
2714   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2715   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2716  [(set_attr "isa" "noavx,avx")
2717   (set_attr "type" "ssecmp")
2718   (set_attr "length_immediate" "1,*")
2719   (set_attr "prefix" "orig,vex")
2720   (set_attr "mode" "<ssescalarmode>")])
2721
2722(define_mode_attr cmp_imm_predicate
2723  [(V16SF "const_0_to_31_operand")  (V8DF "const_0_to_31_operand")
2724   (V16SI "const_0_to_7_operand")   (V8DI "const_0_to_7_operand")
2725   (V8SF "const_0_to_31_operand")   (V4DF "const_0_to_31_operand")
2726   (V8SI "const_0_to_7_operand")    (V4DI "const_0_to_7_operand")
2727   (V4SF "const_0_to_31_operand")   (V2DF "const_0_to_31_operand")
2728   (V4SI "const_0_to_7_operand")    (V2DI "const_0_to_7_operand")
2729   (V32HI "const_0_to_7_operand")   (V64QI "const_0_to_7_operand")
2730   (V16HI "const_0_to_7_operand")   (V32QI "const_0_to_7_operand")
2731   (V8HI "const_0_to_7_operand")    (V16QI "const_0_to_7_operand")])
2732
2733(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2734  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2735	(unspec:<avx512fmaskmode>
2736	  [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2737	   (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2738	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2739	  UNSPEC_PCMP))]
2740  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2741  "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2742  [(set_attr "type" "ssecmp")
2743   (set_attr "length_immediate" "1")
2744   (set_attr "prefix" "evex")
2745   (set_attr "mode" "<sseinsnmode>")])
2746
2747(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2748  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2749	(unspec:<avx512fmaskmode>
2750	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2751	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2752	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2753	  UNSPEC_PCMP))]
2754  "TARGET_AVX512BW"
2755  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2756  [(set_attr "type" "ssecmp")
2757   (set_attr "length_immediate" "1")
2758   (set_attr "prefix" "evex")
2759   (set_attr "mode" "<sseinsnmode>")])
2760
2761(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2762  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2763	(unspec:<avx512fmaskmode>
2764	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2765	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2766	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2767	  UNSPEC_UNSIGNED_PCMP))]
2768  "TARGET_AVX512BW"
2769  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2770  [(set_attr "type" "ssecmp")
2771   (set_attr "length_immediate" "1")
2772   (set_attr "prefix" "evex")
2773   (set_attr "mode" "<sseinsnmode>")])
2774
2775(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2776  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2777	(unspec:<avx512fmaskmode>
2778	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2779	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2780	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2781	  UNSPEC_UNSIGNED_PCMP))]
2782  "TARGET_AVX512F"
2783  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2784  [(set_attr "type" "ssecmp")
2785   (set_attr "length_immediate" "1")
2786   (set_attr "prefix" "evex")
2787   (set_attr "mode" "<sseinsnmode>")])
2788
2789(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2790  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2791	(and:<avx512fmaskmode>
2792	  (unspec:<avx512fmaskmode>
2793	    [(match_operand:VF_128 1 "register_operand" "v")
2794	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2795	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2796	    UNSPEC_PCMP)
2797	  (const_int 1)))]
2798  "TARGET_AVX512F"
2799  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
2800  [(set_attr "type" "ssecmp")
2801   (set_attr "length_immediate" "1")
2802   (set_attr "prefix" "evex")
2803   (set_attr "mode" "<ssescalarmode>")])
2804
2805(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2806  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2807	(and:<avx512fmaskmode>
2808	  (unspec:<avx512fmaskmode>
2809	    [(match_operand:VF_128 1 "register_operand" "v")
2810	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2811	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2812	    UNSPEC_PCMP)
2813	  (and:<avx512fmaskmode>
2814	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2815	    (const_int 1))))]
2816  "TARGET_AVX512F"
2817  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
2818  [(set_attr "type" "ssecmp")
2819   (set_attr "length_immediate" "1")
2820   (set_attr "prefix" "evex")
2821   (set_attr "mode" "<ssescalarmode>")])
2822
2823(define_insn "avx512f_maskcmp<mode>3"
2824  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2825	(match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2826	  [(match_operand:VF 1 "register_operand" "v")
2827	   (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2828  "TARGET_AVX512F"
2829  "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2830  [(set_attr "type" "ssecmp")
2831   (set_attr "length_immediate" "1")
2832   (set_attr "prefix" "evex")
2833   (set_attr "mode" "<sseinsnmode>")])
2834
2835(define_insn "<sse>_<unord>comi<round_saeonly_name>"
2836  [(set (reg:CCFP FLAGS_REG)
2837	(compare:CCFP
2838	  (vec_select:MODEF
2839	    (match_operand:<ssevecmode> 0 "register_operand" "v")
2840	    (parallel [(const_int 0)]))
2841	  (vec_select:MODEF
2842	    (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2843	    (parallel [(const_int 0)]))))]
2844  "SSE_FLOAT_MODE_P (<MODE>mode)"
2845  "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2846  [(set_attr "type" "ssecomi")
2847   (set_attr "prefix" "maybe_vex")
2848   (set_attr "prefix_rep" "0")
2849   (set (attr "prefix_data16")
2850	(if_then_else (eq_attr "mode" "DF")
2851		      (const_string "1")
2852		      (const_string "0")))
2853   (set_attr "mode" "<MODE>")])
2854
2855(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2856  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2857	(match_operator:<avx512fmaskmode> 1 ""
2858	  [(match_operand:V48_AVX512VL 2 "register_operand")
2859	   (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2860  "TARGET_AVX512F"
2861{
2862  bool ok = ix86_expand_mask_vec_cmp (operands);
2863  gcc_assert (ok);
2864  DONE;
2865})
2866
2867(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2868  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2869	(match_operator:<avx512fmaskmode> 1 ""
2870	  [(match_operand:VI12_AVX512VL 2 "register_operand")
2871	   (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2872  "TARGET_AVX512BW"
2873{
2874  bool ok = ix86_expand_mask_vec_cmp (operands);
2875  gcc_assert (ok);
2876  DONE;
2877})
2878
2879(define_expand "vec_cmp<mode><sseintvecmodelower>"
2880  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2881	(match_operator:<sseintvecmode> 1 ""
2882	  [(match_operand:VI_256 2 "register_operand")
2883	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
2884  "TARGET_AVX2"
2885{
2886  bool ok = ix86_expand_int_vec_cmp (operands);
2887  gcc_assert (ok);
2888  DONE;
2889})
2890
2891(define_expand "vec_cmp<mode><sseintvecmodelower>"
2892  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2893	(match_operator:<sseintvecmode> 1 ""
2894	  [(match_operand:VI124_128 2 "register_operand")
2895	   (match_operand:VI124_128 3 "vector_operand")]))]
2896  "TARGET_SSE2"
2897{
2898  bool ok = ix86_expand_int_vec_cmp (operands);
2899  gcc_assert (ok);
2900  DONE;
2901})
2902
2903(define_expand "vec_cmpv2div2di"
2904  [(set (match_operand:V2DI 0 "register_operand")
2905	(match_operator:V2DI 1 ""
2906	  [(match_operand:V2DI 2 "register_operand")
2907	   (match_operand:V2DI 3 "vector_operand")]))]
2908  "TARGET_SSE4_2"
2909{
2910  bool ok = ix86_expand_int_vec_cmp (operands);
2911  gcc_assert (ok);
2912  DONE;
2913})
2914
2915(define_expand "vec_cmp<mode><sseintvecmodelower>"
2916  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2917	(match_operator:<sseintvecmode> 1 ""
2918	  [(match_operand:VF_256 2 "register_operand")
2919	   (match_operand:VF_256 3 "nonimmediate_operand")]))]
2920  "TARGET_AVX"
2921{
2922  bool ok = ix86_expand_fp_vec_cmp (operands);
2923  gcc_assert (ok);
2924  DONE;
2925})
2926
2927(define_expand "vec_cmp<mode><sseintvecmodelower>"
2928  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2929	(match_operator:<sseintvecmode> 1 ""
2930	  [(match_operand:VF_128 2 "register_operand")
2931	   (match_operand:VF_128 3 "vector_operand")]))]
2932  "TARGET_SSE"
2933{
2934  bool ok = ix86_expand_fp_vec_cmp (operands);
2935  gcc_assert (ok);
2936  DONE;
2937})
2938
2939(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2940  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2941	(match_operator:<avx512fmaskmode> 1 ""
2942	  [(match_operand:VI48_AVX512VL 2 "register_operand")
2943	   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2944  "TARGET_AVX512F"
2945{
2946  bool ok = ix86_expand_mask_vec_cmp (operands);
2947  gcc_assert (ok);
2948  DONE;
2949})
2950
2951(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2952  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2953	(match_operator:<avx512fmaskmode> 1 ""
2954	  [(match_operand:VI12_AVX512VL 2 "register_operand")
2955	   (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2956  "TARGET_AVX512BW"
2957{
2958  bool ok = ix86_expand_mask_vec_cmp (operands);
2959  gcc_assert (ok);
2960  DONE;
2961})
2962
2963(define_expand "vec_cmpu<mode><sseintvecmodelower>"
2964  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2965	(match_operator:<sseintvecmode> 1 ""
2966	  [(match_operand:VI_256 2 "register_operand")
2967	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
2968  "TARGET_AVX2"
2969{
2970  bool ok = ix86_expand_int_vec_cmp (operands);
2971  gcc_assert (ok);
2972  DONE;
2973})
2974
2975(define_expand "vec_cmpu<mode><sseintvecmodelower>"
2976  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2977	(match_operator:<sseintvecmode> 1 ""
2978	  [(match_operand:VI124_128 2 "register_operand")
2979	   (match_operand:VI124_128 3 "vector_operand")]))]
2980  "TARGET_SSE2"
2981{
2982  bool ok = ix86_expand_int_vec_cmp (operands);
2983  gcc_assert (ok);
2984  DONE;
2985})
2986
2987(define_expand "vec_cmpuv2div2di"
2988  [(set (match_operand:V2DI 0 "register_operand")
2989	(match_operator:V2DI 1 ""
2990	  [(match_operand:V2DI 2 "register_operand")
2991	   (match_operand:V2DI 3 "vector_operand")]))]
2992  "TARGET_SSE4_2"
2993{
2994  bool ok = ix86_expand_int_vec_cmp (operands);
2995  gcc_assert (ok);
2996  DONE;
2997})
2998
2999(define_expand "vec_cmpeqv2div2di"
3000  [(set (match_operand:V2DI 0 "register_operand")
3001	(match_operator:V2DI 1 ""
3002	  [(match_operand:V2DI 2 "register_operand")
3003	   (match_operand:V2DI 3 "vector_operand")]))]
3004  "TARGET_SSE4_1"
3005{
3006  bool ok = ix86_expand_int_vec_cmp (operands);
3007  gcc_assert (ok);
3008  DONE;
3009})
3010
3011(define_expand "vcond<V_512:mode><VF_512:mode>"
3012  [(set (match_operand:V_512 0 "register_operand")
3013	(if_then_else:V_512
3014	  (match_operator 3 ""
3015	    [(match_operand:VF_512 4 "nonimmediate_operand")
3016	     (match_operand:VF_512 5 "nonimmediate_operand")])
3017	  (match_operand:V_512 1 "general_operand")
3018	  (match_operand:V_512 2 "general_operand")))]
3019  "TARGET_AVX512F
3020   && (GET_MODE_NUNITS (<V_512:MODE>mode)
3021       == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3022{
3023  bool ok = ix86_expand_fp_vcond (operands);
3024  gcc_assert (ok);
3025  DONE;
3026})
3027
3028(define_expand "vcond<V_256:mode><VF_256:mode>"
3029  [(set (match_operand:V_256 0 "register_operand")
3030	(if_then_else:V_256
3031	  (match_operator 3 ""
3032	    [(match_operand:VF_256 4 "nonimmediate_operand")
3033	     (match_operand:VF_256 5 "nonimmediate_operand")])
3034	  (match_operand:V_256 1 "general_operand")
3035	  (match_operand:V_256 2 "general_operand")))]
3036  "TARGET_AVX
3037   && (GET_MODE_NUNITS (<V_256:MODE>mode)
3038       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3039{
3040  bool ok = ix86_expand_fp_vcond (operands);
3041  gcc_assert (ok);
3042  DONE;
3043})
3044
3045(define_expand "vcond<V_128:mode><VF_128:mode>"
3046  [(set (match_operand:V_128 0 "register_operand")
3047	(if_then_else:V_128
3048	  (match_operator 3 ""
3049	    [(match_operand:VF_128 4 "vector_operand")
3050	     (match_operand:VF_128 5 "vector_operand")])
3051	  (match_operand:V_128 1 "general_operand")
3052	  (match_operand:V_128 2 "general_operand")))]
3053  "TARGET_SSE
3054   && (GET_MODE_NUNITS (<V_128:MODE>mode)
3055       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3056{
3057  bool ok = ix86_expand_fp_vcond (operands);
3058  gcc_assert (ok);
3059  DONE;
3060})
3061
3062(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3063  [(set (match_operand:V48_AVX512VL 0 "register_operand")
3064	(vec_merge:V48_AVX512VL
3065	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3066	  (match_operand:V48_AVX512VL 2 "vector_move_operand")
3067	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3068  "TARGET_AVX512F")
3069
3070(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3071  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3072	(vec_merge:VI12_AVX512VL
3073	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3074	  (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3075	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3076  "TARGET_AVX512BW")
3077
3078(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3079  [(set (match_operand:VI_256 0 "register_operand")
3080	(vec_merge:VI_256
3081	  (match_operand:VI_256 1 "nonimmediate_operand")
3082	  (match_operand:VI_256 2 "vector_move_operand")
3083	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3084  "TARGET_AVX2"
3085{
3086  ix86_expand_sse_movcc (operands[0], operands[3],
3087			 operands[1], operands[2]);
3088  DONE;
3089})
3090
3091(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3092  [(set (match_operand:VI124_128 0 "register_operand")
3093	(vec_merge:VI124_128
3094	  (match_operand:VI124_128 1 "vector_operand")
3095	  (match_operand:VI124_128 2 "vector_move_operand")
3096	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3097  "TARGET_SSE2"
3098{
3099  ix86_expand_sse_movcc (operands[0], operands[3],
3100			 operands[1], operands[2]);
3101  DONE;
3102})
3103
3104(define_expand "vcond_mask_v2div2di"
3105  [(set (match_operand:V2DI 0 "register_operand")
3106	(vec_merge:V2DI
3107	  (match_operand:V2DI 1 "vector_operand")
3108	  (match_operand:V2DI 2 "vector_move_operand")
3109	  (match_operand:V2DI 3 "register_operand")))]
3110  "TARGET_SSE4_2"
3111{
3112  ix86_expand_sse_movcc (operands[0], operands[3],
3113			 operands[1], operands[2]);
3114  DONE;
3115})
3116
3117(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3118  [(set (match_operand:VF_256 0 "register_operand")
3119	(vec_merge:VF_256
3120	  (match_operand:VF_256 1 "nonimmediate_operand")
3121	  (match_operand:VF_256 2 "vector_move_operand")
3122	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3123  "TARGET_AVX"
3124{
3125  ix86_expand_sse_movcc (operands[0], operands[3],
3126			 operands[1], operands[2]);
3127  DONE;
3128})
3129
3130(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3131  [(set (match_operand:VF_128 0 "register_operand")
3132	(vec_merge:VF_128
3133	  (match_operand:VF_128 1 "vector_operand")
3134	  (match_operand:VF_128 2 "vector_move_operand")
3135	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3136  "TARGET_SSE"
3137{
3138  ix86_expand_sse_movcc (operands[0], operands[3],
3139			 operands[1], operands[2]);
3140  DONE;
3141})
3142
3143;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3144;;
3145;; Parallel floating point logical operations
3146;;
3147;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3148
3149(define_insn "<sse>_andnot<mode>3<mask_name>"
3150  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3151	(and:VF_128_256
3152	  (not:VF_128_256
3153	    (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3154	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3155  "TARGET_SSE && <mask_avx512vl_condition>"
3156{
3157  static char buf[128];
3158  const char *ops;
3159  const char *suffix;
3160
3161  switch (which_alternative)
3162    {
3163    case 0:
3164      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3165      break;
3166    case 1:
3167    case 2:
3168    case 3:
3169      ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3170      break;
3171    default:
3172      gcc_unreachable ();
3173    }
3174
3175  switch (get_attr_mode (insn))
3176    {
3177    case MODE_V8SF:
3178    case MODE_V4SF:
3179      suffix = "ps";
3180      break;
3181    case MODE_OI:
3182    case MODE_TI:
3183      /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
3184      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3185      ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3186      break;
3187    default:
3188      suffix = "<ssemodesuffix>";
3189    }
3190
3191  snprintf (buf, sizeof (buf), ops, suffix);
3192  return buf;
3193}
3194  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3195   (set_attr "type" "sselog")
3196   (set_attr "prefix" "orig,maybe_vex,evex,evex")
3197   (set (attr "mode")
3198	(cond [(and (match_test "<mask_applied>")
3199		    (and (eq_attr "alternative" "1")
3200			 (match_test "!TARGET_AVX512DQ")))
3201		 (const_string "<sseintvecmode2>")
3202	       (eq_attr "alternative" "3")
3203		 (const_string "<sseintvecmode2>")
3204	       (and (match_test "<MODE_SIZE> == 16")
3205		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3206		 (const_string "<ssePSmode>")
3207	       (match_test "TARGET_AVX")
3208		 (const_string "<MODE>")
3209	       (match_test "optimize_function_for_size_p (cfun)")
3210		 (const_string "V4SF")
3211	       ]
3212	       (const_string "<MODE>")))])
3213
3214
3215(define_insn "<sse>_andnot<mode>3<mask_name>"
3216  [(set (match_operand:VF_512 0 "register_operand" "=v")
3217	(and:VF_512
3218	  (not:VF_512
3219	    (match_operand:VF_512 1 "register_operand" "v"))
3220	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3221  "TARGET_AVX512F"
3222{
3223  static char buf[128];
3224  const char *ops;
3225  const char *suffix;
3226
3227  suffix = "<ssemodesuffix>";
3228  ops = "";
3229
3230  /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
3231  if (!TARGET_AVX512DQ)
3232    {
3233      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3234      ops = "p";
3235    }
3236
3237  snprintf (buf, sizeof (buf),
3238	    "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3239	    ops, suffix);
3240  return buf;
3241}
3242  [(set_attr "type" "sselog")
3243   (set_attr "prefix" "evex")
3244   (set (attr "mode")
3245        (if_then_else (match_test "TARGET_AVX512DQ")
3246		      (const_string "<sseinsnmode>")
3247		      (const_string "XI")))])
3248
3249(define_expand "<code><mode>3<mask_name>"
3250  [(set (match_operand:VF_128_256 0 "register_operand")
3251       (any_logic:VF_128_256
3252         (match_operand:VF_128_256 1 "vector_operand")
3253         (match_operand:VF_128_256 2 "vector_operand")))]
3254  "TARGET_SSE && <mask_avx512vl_condition>"
3255  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3256
3257(define_expand "<code><mode>3<mask_name>"
3258  [(set (match_operand:VF_512 0 "register_operand")
3259       (any_logic:VF_512
3260         (match_operand:VF_512 1 "nonimmediate_operand")
3261         (match_operand:VF_512 2 "nonimmediate_operand")))]
3262  "TARGET_AVX512F"
3263  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3264
3265(define_insn "*<code><mode>3<mask_name>"
3266  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3267	(any_logic:VF_128_256
3268	  (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3269	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3270  "TARGET_SSE && <mask_avx512vl_condition>
3271   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3272{
3273  static char buf[128];
3274  const char *ops;
3275  const char *suffix;
3276
3277  switch (which_alternative)
3278    {
3279    case 0:
3280      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3281      break;
3282    case 1:
3283    case 2:
3284    case 3:
3285      ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3286      break;
3287    default:
3288      gcc_unreachable ();
3289    }
3290
3291  switch (get_attr_mode (insn))
3292    {
3293    case MODE_V8SF:
3294    case MODE_V4SF:
3295      suffix = "ps";
3296      break;
3297    case MODE_OI:
3298    case MODE_TI:
3299      /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[qd].  */
3300      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3301      ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3302      break;
3303    default:
3304      suffix = "<ssemodesuffix>";
3305    }
3306
3307  snprintf (buf, sizeof (buf), ops, suffix);
3308  return buf;
3309}
3310  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3311   (set_attr "type" "sselog")
3312   (set_attr "prefix" "orig,maybe_evex,evex,evex")
3313   (set (attr "mode")
3314	(cond [(and (match_test "<mask_applied>")
3315		    (and (eq_attr "alternative" "1")
3316			 (match_test "!TARGET_AVX512DQ")))
3317		 (const_string "<sseintvecmode2>")
3318	       (eq_attr "alternative" "3")
3319		 (const_string "<sseintvecmode2>")
3320	       (and (match_test "<MODE_SIZE> == 16")
3321		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3322		 (const_string "<ssePSmode>")
3323	       (match_test "TARGET_AVX")
3324		 (const_string "<MODE>")
3325	       (match_test "optimize_function_for_size_p (cfun)")
3326		 (const_string "V4SF")
3327	       ]
3328	       (const_string "<MODE>")))])
3329
3330(define_insn "*<code><mode>3<mask_name>"
3331  [(set (match_operand:VF_512 0 "register_operand" "=v")
3332	(any_logic:VF_512
3333	  (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3334	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3335  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3336{
3337  static char buf[128];
3338  const char *ops;
3339  const char *suffix;
3340
3341  suffix = "<ssemodesuffix>";
3342  ops = "";
3343
3344  /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[dq].  */
3345  if (!TARGET_AVX512DQ)
3346    {
3347      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3348      ops = "p";
3349    }
3350
3351  snprintf (buf, sizeof (buf),
3352	   "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3353	   ops, suffix);
3354  return buf;
3355}
3356  [(set_attr "type" "sselog")
3357   (set_attr "prefix" "evex")
3358   (set (attr "mode")
3359        (if_then_else (match_test "TARGET_AVX512DQ")
3360		      (const_string "<sseinsnmode>")
3361		      (const_string "XI")))])
3362
3363(define_expand "copysign<mode>3"
3364  [(set (match_dup 4)
3365	(and:VF
3366	  (not:VF (match_dup 3))
3367	  (match_operand:VF 1 "vector_operand")))
3368   (set (match_dup 5)
3369	(and:VF (match_dup 3)
3370		(match_operand:VF 2 "vector_operand")))
3371   (set (match_operand:VF 0 "register_operand")
3372	(ior:VF (match_dup 4) (match_dup 5)))]
3373  "TARGET_SSE"
3374{
3375  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3376
3377  operands[4] = gen_reg_rtx (<MODE>mode);
3378  operands[5] = gen_reg_rtx (<MODE>mode);
3379})
3380
3381;; Also define scalar versions.  These are used for abs, neg, and
3382;; conditional move.  Using subregs into vector modes causes register
3383;; allocation lossage.  These patterns do not allow memory operands
3384;; because the native instructions read the full 128-bits.
3385
3386(define_insn "*andnot<mode>3"
3387  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3388	(and:MODEF
3389	  (not:MODEF
3390	    (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3391	    (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3392  "SSE_FLOAT_MODE_P (<MODE>mode)"
3393{
3394  static char buf[128];
3395  const char *ops;
3396  const char *suffix
3397    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3398
3399  switch (which_alternative)
3400    {
3401    case 0:
3402      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3403      break;
3404    case 1:
3405      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3406      break;
3407    case 2:
3408      if (TARGET_AVX512DQ)
3409	ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3410      else
3411	{
3412	  suffix = <MODE>mode == DFmode ? "q" : "d";
3413	  ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3414	}
3415      break;
3416    case 3:
3417      if (TARGET_AVX512DQ)
3418	ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3419      else
3420	{
3421	  suffix = <MODE>mode == DFmode ? "q" : "d";
3422	  ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3423	}
3424      break;
3425    default:
3426      gcc_unreachable ();
3427    }
3428
3429  snprintf (buf, sizeof (buf), ops, suffix);
3430  return buf;
3431}
3432  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3433   (set_attr "type" "sselog")
3434   (set_attr "prefix" "orig,vex,evex,evex")
3435   (set (attr "mode")
3436	(cond [(eq_attr "alternative" "2")
3437		 (if_then_else (match_test "TARGET_AVX512DQ")
3438			       (const_string "<ssevecmode>")
3439			       (const_string "TI"))
3440	       (eq_attr "alternative" "3")
3441		 (if_then_else (match_test "TARGET_AVX512DQ")
3442			       (const_string "<avx512fvecmode>")
3443			       (const_string "XI"))
3444	       (and (match_test "<MODE_SIZE> == 16")
3445		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3446		 (const_string "V4SF")
3447	       (match_test "TARGET_AVX")
3448		 (const_string "<ssevecmode>")
3449	       (match_test "optimize_function_for_size_p (cfun)")
3450		 (const_string "V4SF")
3451	       ]
3452	       (const_string "<ssevecmode>")))])
3453
3454(define_insn "*andnottf3"
3455  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3456	(and:TF
3457	  (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3458	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3459  "TARGET_SSE"
3460{
3461  static char buf[128];
3462  const char *ops;
3463  const char *tmp
3464    = (which_alternative >= 2 ? "pandnq"
3465       : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3466
3467  switch (which_alternative)
3468    {
3469    case 0:
3470      ops = "%s\t{%%2, %%0|%%0, %%2}";
3471      break;
3472    case 1:
3473    case 2:
3474      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3475      break;
3476    case 3:
3477      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3478      break;
3479    default:
3480      gcc_unreachable ();
3481    }
3482
3483  snprintf (buf, sizeof (buf), ops, tmp);
3484  return buf;
3485}
3486  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3487   (set_attr "type" "sselog")
3488   (set (attr "prefix_data16")
3489     (if_then_else
3490       (and (eq_attr "alternative" "0")
3491	    (eq_attr "mode" "TI"))
3492       (const_string "1")
3493       (const_string "*")))
3494   (set_attr "prefix" "orig,vex,evex,evex")
3495   (set (attr "mode")
3496	(cond [(eq_attr "alternative" "2")
3497		 (const_string "TI")
3498	       (eq_attr "alternative" "3")
3499		 (const_string "XI")
3500	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3501		 (const_string "V4SF")
3502	       (match_test "TARGET_AVX")
3503		 (const_string "TI")
3504	       (ior (not (match_test "TARGET_SSE2"))
3505		    (match_test "optimize_function_for_size_p (cfun)"))
3506		 (const_string "V4SF")
3507	       ]
3508	       (const_string "TI")))])
3509
3510(define_insn "*<code><mode>3"
3511  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3512	(any_logic:MODEF
3513	  (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3514	  (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3515  "SSE_FLOAT_MODE_P (<MODE>mode)"
3516{
3517  static char buf[128];
3518  const char *ops;
3519  const char *suffix
3520    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3521
3522  switch (which_alternative)
3523    {
3524    case 0:
3525      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3526      break;
3527    case 2:
3528      if (!TARGET_AVX512DQ)
3529	{
3530	  suffix = <MODE>mode == DFmode ? "q" : "d";
3531	  ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3532	  break;
3533	}
3534      /* FALLTHRU */
3535    case 1:
3536      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3537      break;
3538    case 3:
3539      if (TARGET_AVX512DQ)
3540	ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3541      else
3542	{
3543	  suffix = <MODE>mode == DFmode ? "q" : "d";
3544	  ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3545	}
3546      break;
3547    default:
3548      gcc_unreachable ();
3549    }
3550
3551  snprintf (buf, sizeof (buf), ops, suffix);
3552  return buf;
3553}
3554  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3555   (set_attr "type" "sselog")
3556   (set_attr "prefix" "orig,vex,evex,evex")
3557   (set (attr "mode")
3558	(cond [(eq_attr "alternative" "2")
3559		 (if_then_else (match_test "TARGET_AVX512DQ")
3560			       (const_string "<ssevecmode>")
3561			       (const_string "TI"))
3562	       (eq_attr "alternative" "3")
3563		 (if_then_else (match_test "TARGET_AVX512DQ")
3564			       (const_string "<avx512fvecmode>")
3565			       (const_string "XI"))
3566	       (and (match_test "<MODE_SIZE> == 16")
3567		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3568		 (const_string "V4SF")
3569	       (match_test "TARGET_AVX")
3570		 (const_string "<ssevecmode>")
3571	       (match_test "optimize_function_for_size_p (cfun)")
3572		 (const_string "V4SF")
3573	       ]
3574	       (const_string "<ssevecmode>")))])
3575
3576(define_expand "<code>tf3"
3577  [(set (match_operand:TF 0 "register_operand")
3578	(any_logic:TF
3579	  (match_operand:TF 1 "vector_operand")
3580	  (match_operand:TF 2 "vector_operand")))]
3581  "TARGET_SSE"
3582  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3583
3584(define_insn "*<code>tf3"
3585  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3586	(any_logic:TF
3587	  (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3588	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3589  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3590{
3591  static char buf[128];
3592  const char *ops;
3593  const char *tmp
3594    = (which_alternative >= 2 ? "p<logic>q"
3595       : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3596
3597  switch (which_alternative)
3598    {
3599    case 0:
3600      ops = "%s\t{%%2, %%0|%%0, %%2}";
3601      break;
3602    case 1:
3603    case 2:
3604      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3605      break;
3606    case 3:
3607      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3608      break;
3609    default:
3610      gcc_unreachable ();
3611    }
3612
3613  snprintf (buf, sizeof (buf), ops, tmp);
3614  return buf;
3615}
3616  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3617   (set_attr "type" "sselog")
3618   (set (attr "prefix_data16")
3619     (if_then_else
3620       (and (eq_attr "alternative" "0")
3621	    (eq_attr "mode" "TI"))
3622       (const_string "1")
3623       (const_string "*")))
3624   (set_attr "prefix" "orig,vex,evex,evex")
3625   (set (attr "mode")
3626	(cond [(eq_attr "alternative" "2")
3627		 (const_string "TI")
3628	       (eq_attr "alternative" "3")
3629		 (const_string "QI")
3630	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3631		 (const_string "V4SF")
3632	       (match_test "TARGET_AVX")
3633		 (const_string "TI")
3634	       (ior (not (match_test "TARGET_SSE2"))
3635		    (match_test "optimize_function_for_size_p (cfun)"))
3636		 (const_string "V4SF")
3637	       ]
3638	       (const_string "TI")))])
3639
3640;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3641;;
3642;; FMA floating point multiply/accumulate instructions.  These include
3643;; scalar versions of the instructions as well as vector versions.
3644;;
3645;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3646
3647;; The standard names for scalar FMA are only available with SSE math enabled.
3648;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma.  It doesn't
3649;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3650;; and TARGET_FMA4 are both false.
3651;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3652;; one must force the EVEX encoding of the fma insns.  Ideally we'd improve
3653;; GAS to allow proper prefix selection.  However, for the moment all hardware
3654;; that supports AVX512F also supports FMA so we can ignore this for now.
3655(define_mode_iterator FMAMODEM
3656  [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3657   (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3658   (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3659   (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3660   (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3661   (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3662   (V16SF "TARGET_AVX512F")
3663   (V8DF "TARGET_AVX512F")])
3664
3665(define_expand "fma<mode>4"
3666  [(set (match_operand:FMAMODEM 0 "register_operand")
3667	(fma:FMAMODEM
3668	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3669	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3670	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3671
3672(define_expand "fms<mode>4"
3673  [(set (match_operand:FMAMODEM 0 "register_operand")
3674	(fma:FMAMODEM
3675	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3676	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3677	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3678
3679(define_expand "fnma<mode>4"
3680  [(set (match_operand:FMAMODEM 0 "register_operand")
3681	(fma:FMAMODEM
3682	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3683	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3684	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3685
3686(define_expand "fnms<mode>4"
3687  [(set (match_operand:FMAMODEM 0 "register_operand")
3688	(fma:FMAMODEM
3689	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3690	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3691	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3692
3693;; The builtins for intrinsics are not constrained by SSE math enabled.
3694(define_mode_iterator FMAMODE_AVX512
3695 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3696  (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3697  (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3698  (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3699  (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3700  (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3701  (V16SF "TARGET_AVX512F")
3702  (V8DF "TARGET_AVX512F")])
3703
3704(define_mode_iterator FMAMODE
3705  [SF DF V4SF V2DF V8SF V4DF])
3706
3707(define_expand "fma4i_fmadd_<mode>"
3708  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3709	(fma:FMAMODE_AVX512
3710	  (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3711	  (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3712	  (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3713
3714(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3715  [(match_operand:VF_AVX512VL 0 "register_operand")
3716   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3717   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3718   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3719   (match_operand:<avx512fmaskmode> 4 "register_operand")]
3720  "TARGET_AVX512F && <round_mode512bit_condition>"
3721{
3722  emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3723    operands[0], operands[1], operands[2], operands[3],
3724    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3725  DONE;
3726})
3727
3728(define_insn "*fma_fmadd_<mode>"
3729  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3730	(fma:FMAMODE
3731	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3732	  (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3733	  (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3734  "TARGET_FMA || TARGET_FMA4"
3735  "@
3736   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3737   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3738   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3739   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3740   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3741  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3742   (set_attr "type" "ssemuladd")
3743   (set_attr "mode" "<MODE>")])
3744
3745;; Suppose AVX-512F as baseline
3746(define_mode_iterator VF_SF_AVX512VL
3747  [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3748   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3749
3750(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3751  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3752	(fma:VF_SF_AVX512VL
3753	  (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3754	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3755	  (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3756  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3757  "@
3758   vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3759   vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3760   vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3761  [(set_attr "type" "ssemuladd")
3762   (set_attr "mode" "<MODE>")])
3763
3764(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3765  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3766	(vec_merge:VF_AVX512VL
3767	  (fma:VF_AVX512VL
3768	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3769	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3770	    (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3771	  (match_dup 1)
3772	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3773  "TARGET_AVX512F && <round_mode512bit_condition>"
3774  "@
3775   vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3776   vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3777  [(set_attr "type" "ssemuladd")
3778   (set_attr "mode" "<MODE>")])
3779
3780(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3781  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3782	(vec_merge:VF_AVX512VL
3783	  (fma:VF_AVX512VL
3784	    (match_operand:VF_AVX512VL 1 "register_operand" "v")
3785	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3786	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3787	  (match_dup 3)
3788	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3789  "TARGET_AVX512F"
3790  "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3791  [(set_attr "type" "ssemuladd")
3792   (set_attr "mode" "<MODE>")])
3793
3794(define_insn "*fma_fmsub_<mode>"
3795  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3796	(fma:FMAMODE
3797	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0,0,v,x,x")
3798	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3799	  (neg:FMAMODE
3800	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3801  "TARGET_FMA || TARGET_FMA4"
3802  "@
3803   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3804   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3805   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3806   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3807   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3808  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3809   (set_attr "type" "ssemuladd")
3810   (set_attr "mode" "<MODE>")])
3811
3812(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3813  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3814	(fma:VF_SF_AVX512VL
3815	  (match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
3816	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3817	  (neg:VF_SF_AVX512VL
3818	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3819  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3820  "@
3821   vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3822   vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3823   vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3824  [(set_attr "type" "ssemuladd")
3825   (set_attr "mode" "<MODE>")])
3826
3827(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3828  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3829	(vec_merge:VF_AVX512VL
3830	  (fma:VF_AVX512VL
3831	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3832	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3833	    (neg:VF_AVX512VL
3834	      (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3835	  (match_dup 1)
3836	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3837  "TARGET_AVX512F"
3838  "@
3839   vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3840   vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3841  [(set_attr "type" "ssemuladd")
3842   (set_attr "mode" "<MODE>")])
3843
3844(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3845  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3846	(vec_merge:VF_AVX512VL
3847	  (fma:VF_AVX512VL
3848	    (match_operand:VF_AVX512VL 1 "register_operand" "v")
3849	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3850	    (neg:VF_AVX512VL
3851	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3852	  (match_dup 3)
3853	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3854  "TARGET_AVX512F && <round_mode512bit_condition>"
3855  "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3856  [(set_attr "type" "ssemuladd")
3857   (set_attr "mode" "<MODE>")])
3858
3859(define_insn "*fma_fnmadd_<mode>"
3860  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3861	(fma:FMAMODE
3862	  (neg:FMAMODE
3863	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3864	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3865	  (match_operand:FMAMODE   3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3866  "TARGET_FMA || TARGET_FMA4"
3867  "@
3868   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3869   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3870   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3871   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3872   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3873  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3874   (set_attr "type" "ssemuladd")
3875   (set_attr "mode" "<MODE>")])
3876
3877(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3878  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3879	(fma:VF_SF_AVX512VL
3880	  (neg:VF_SF_AVX512VL
3881	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3882	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3883	  (match_operand:VF_SF_AVX512VL   3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3884  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3885  "@
3886   vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3887   vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3888   vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3889  [(set_attr "type" "ssemuladd")
3890   (set_attr "mode" "<MODE>")])
3891
3892(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3893  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3894	(vec_merge:VF_AVX512VL
3895	  (fma:VF_AVX512VL
3896	    (neg:VF_AVX512VL
3897	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3898	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3899	    (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3900	  (match_dup 1)
3901	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3902  "TARGET_AVX512F && <round_mode512bit_condition>"
3903  "@
3904   vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3905   vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3906  [(set_attr "type" "ssemuladd")
3907   (set_attr "mode" "<MODE>")])
3908
3909(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3910  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3911	(vec_merge:VF_AVX512VL
3912	  (fma:VF_AVX512VL
3913	    (neg:VF_AVX512VL
3914	      (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3915	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3916	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3917	  (match_dup 3)
3918	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3919  "TARGET_AVX512F && <round_mode512bit_condition>"
3920  "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3921  [(set_attr "type" "ssemuladd")
3922   (set_attr "mode" "<MODE>")])
3923
3924(define_insn "*fma_fnmsub_<mode>"
3925  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3926	(fma:FMAMODE
3927	  (neg:FMAMODE
3928	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3929	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3930	  (neg:FMAMODE
3931	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3932  "TARGET_FMA || TARGET_FMA4"
3933  "@
3934   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3935   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3936   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3937   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3938   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3939  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3940   (set_attr "type" "ssemuladd")
3941   (set_attr "mode" "<MODE>")])
3942
3943(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3944  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3945	(fma:VF_SF_AVX512VL
3946	  (neg:VF_SF_AVX512VL
3947	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3948	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3949	  (neg:VF_SF_AVX512VL
3950	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3951  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3952  "@
3953   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3954   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3955   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3956  [(set_attr "type" "ssemuladd")
3957   (set_attr "mode" "<MODE>")])
3958
3959(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3960  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3961	(vec_merge:VF_AVX512VL
3962	  (fma:VF_AVX512VL
3963	    (neg:VF_AVX512VL
3964	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3965	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3966	    (neg:VF_AVX512VL
3967	      (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3968	  (match_dup 1)
3969	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3970  "TARGET_AVX512F && <round_mode512bit_condition>"
3971  "@
3972   vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3973   vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3974  [(set_attr "type" "ssemuladd")
3975   (set_attr "mode" "<MODE>")])
3976
3977(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3978  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3979	(vec_merge:VF_AVX512VL
3980	  (fma:VF_AVX512VL
3981	    (neg:VF_AVX512VL
3982	      (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3983	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3984	    (neg:VF_AVX512VL
3985	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3986	  (match_dup 3)
3987	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3988  "TARGET_AVX512F"
3989  "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3990  [(set_attr "type" "ssemuladd")
3991   (set_attr "mode" "<MODE>")])
3992
3993;; FMA parallel floating point multiply addsub and subadd operations.
3994
3995;; It would be possible to represent these without the UNSPEC as
3996;;
3997;; (vec_merge
3998;;   (fma op1 op2 op3)
3999;;   (fma op1 op2 (neg op3))
4000;;   (merge-const))
4001;;
4002;; But this doesn't seem useful in practice.
4003
4004(define_expand "fmaddsub_<mode>"
4005  [(set (match_operand:VF 0 "register_operand")
4006	(unspec:VF
4007	  [(match_operand:VF 1 "nonimmediate_operand")
4008	   (match_operand:VF 2 "nonimmediate_operand")
4009	   (match_operand:VF 3 "nonimmediate_operand")]
4010	  UNSPEC_FMADDSUB))]
4011  "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4012
4013(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4014  [(match_operand:VF_AVX512VL 0 "register_operand")
4015   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4016   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4017   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4018   (match_operand:<avx512fmaskmode> 4 "register_operand")]
4019  "TARGET_AVX512F"
4020{
4021  emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4022    operands[0], operands[1], operands[2], operands[3],
4023    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4024  DONE;
4025})
4026
4027(define_insn "*fma_fmaddsub_<mode>"
4028  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4029	(unspec:VF_128_256
4030	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4031	   (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4032	   (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4033	  UNSPEC_FMADDSUB))]
4034  "TARGET_FMA || TARGET_FMA4"
4035  "@
4036   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4037   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4038   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4039   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4040   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4041  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4042   (set_attr "type" "ssemuladd")
4043   (set_attr "mode" "<MODE>")])
4044
4045(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4046  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4047	(unspec:VF_SF_AVX512VL
4048	  [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4049	   (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4050	   (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4051	  UNSPEC_FMADDSUB))]
4052  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4053  "@
4054   vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4055   vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4056   vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4057  [(set_attr "type" "ssemuladd")
4058   (set_attr "mode" "<MODE>")])
4059
4060(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4061  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4062	(vec_merge:VF_AVX512VL
4063	  (unspec:VF_AVX512VL
4064	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4065	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4066	     (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
4067	    UNSPEC_FMADDSUB)
4068	  (match_dup 1)
4069	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4070  "TARGET_AVX512F"
4071  "@
4072   vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4073   vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4074  [(set_attr "type" "ssemuladd")
4075   (set_attr "mode" "<MODE>")])
4076
4077(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4078  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4079	(vec_merge:VF_AVX512VL
4080	  (unspec:VF_AVX512VL
4081	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4082	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4083	     (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4084	    UNSPEC_FMADDSUB)
4085	  (match_dup 3)
4086	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4087  "TARGET_AVX512F"
4088  "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4089  [(set_attr "type" "ssemuladd")
4090   (set_attr "mode" "<MODE>")])
4091
4092(define_insn "*fma_fmsubadd_<mode>"
4093  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4094	(unspec:VF_128_256
4095	  [(match_operand:VF_128_256   1 "nonimmediate_operand" "%0,0,v,x,x")
4096	   (match_operand:VF_128_256   2 "nonimmediate_operand" "vm,v,vm,x,m")
4097	   (neg:VF_128_256
4098	     (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4099	  UNSPEC_FMADDSUB))]
4100  "TARGET_FMA || TARGET_FMA4"
4101  "@
4102   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4103   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4104   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4105   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4106   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4107  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4108   (set_attr "type" "ssemuladd")
4109   (set_attr "mode" "<MODE>")])
4110
4111(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4112  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4113	(unspec:VF_SF_AVX512VL
4114	  [(match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
4115	   (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4116	   (neg:VF_SF_AVX512VL
4117	     (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4118	  UNSPEC_FMADDSUB))]
4119  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4120  "@
4121   vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4122   vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4123   vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4124  [(set_attr "type" "ssemuladd")
4125   (set_attr "mode" "<MODE>")])
4126
4127(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4128  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4129	(vec_merge:VF_AVX512VL
4130	  (unspec:VF_AVX512VL
4131	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4132	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4133	     (neg:VF_AVX512VL
4134	       (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4135	    UNSPEC_FMADDSUB)
4136	  (match_dup 1)
4137	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4138  "TARGET_AVX512F"
4139  "@
4140   vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4141   vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4142  [(set_attr "type" "ssemuladd")
4143   (set_attr "mode" "<MODE>")])
4144
4145(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4146  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4147	(vec_merge:VF_AVX512VL
4148	  (unspec:VF_AVX512VL
4149	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4150	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4151	     (neg:VF_AVX512VL
4152	       (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4153	    UNSPEC_FMADDSUB)
4154	  (match_dup 3)
4155	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4156  "TARGET_AVX512F"
4157  "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4158  [(set_attr "type" "ssemuladd")
4159   (set_attr "mode" "<MODE>")])
4160
4161;; FMA3 floating point scalar intrinsics. These merge result with
4162;; high-order elements from the destination register.
4163
4164(define_expand "fmai_vmfmadd_<mode><round_name>"
4165  [(set (match_operand:VF_128 0 "register_operand")
4166	(vec_merge:VF_128
4167	  (fma:VF_128
4168	    (match_operand:VF_128 1 "<round_nimm_predicate>")
4169	    (match_operand:VF_128 2 "<round_nimm_predicate>")
4170	    (match_operand:VF_128 3 "<round_nimm_predicate>"))
4171	  (match_dup 1)
4172	  (const_int 1)))]
4173  "TARGET_FMA")
4174
4175(define_insn "*fmai_fmadd_<mode>"
4176  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4177        (vec_merge:VF_128
4178	  (fma:VF_128
4179	    (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4180	    (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4181	    (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4182	  (match_dup 1)
4183	  (const_int 1)))]
4184  "TARGET_FMA || TARGET_AVX512F"
4185  "@
4186   vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4187   vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4188  [(set_attr "type" "ssemuladd")
4189   (set_attr "mode" "<MODE>")])
4190
4191(define_insn "*fmai_fmsub_<mode>"
4192  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4193        (vec_merge:VF_128
4194	  (fma:VF_128
4195	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
4196	    (match_operand:VF_128   2 "<round_nimm_predicate>" "<round_constraint>,v")
4197	    (neg:VF_128
4198	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4199	  (match_dup 1)
4200	  (const_int 1)))]
4201  "TARGET_FMA || TARGET_AVX512F"
4202  "@
4203   vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4204   vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4205  [(set_attr "type" "ssemuladd")
4206   (set_attr "mode" "<MODE>")])
4207
4208(define_insn "*fmai_fnmadd_<mode><round_name>"
4209  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4210        (vec_merge:VF_128
4211	  (fma:VF_128
4212	    (neg:VF_128
4213	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4214	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
4215	    (match_operand:VF_128   3 "<round_nimm_predicate>" "v,<round_constraint>"))
4216	  (match_dup 1)
4217	  (const_int 1)))]
4218  "TARGET_FMA || TARGET_AVX512F"
4219  "@
4220   vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4221   vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4222  [(set_attr "type" "ssemuladd")
4223   (set_attr "mode" "<MODE>")])
4224
4225(define_insn "*fmai_fnmsub_<mode><round_name>"
4226  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4227        (vec_merge:VF_128
4228	  (fma:VF_128
4229	    (neg:VF_128
4230	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4231	    (match_operand:VF_128   1 "<round_nimm_predicate>" " 0, 0")
4232	    (neg:VF_128
4233	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4234	  (match_dup 1)
4235	  (const_int 1)))]
4236  "TARGET_FMA || TARGET_AVX512F"
4237  "@
4238   vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4239   vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4240  [(set_attr "type" "ssemuladd")
4241   (set_attr "mode" "<MODE>")])
4242
4243;; FMA4 floating point scalar intrinsics.  These write the
4244;; entire destination register, with the high-order elements zeroed.
4245
4246(define_expand "fma4i_vmfmadd_<mode>"
4247  [(set (match_operand:VF_128 0 "register_operand")
4248	(vec_merge:VF_128
4249	  (fma:VF_128
4250	    (match_operand:VF_128 1 "nonimmediate_operand")
4251	    (match_operand:VF_128 2 "nonimmediate_operand")
4252	    (match_operand:VF_128 3 "nonimmediate_operand"))
4253	  (match_dup 4)
4254	  (const_int 1)))]
4255  "TARGET_FMA4"
4256  "operands[4] = CONST0_RTX (<MODE>mode);")
4257
4258(define_insn "*fma4i_vmfmadd_<mode>"
4259  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4260	(vec_merge:VF_128
4261	  (fma:VF_128
4262	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4263	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4264	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4265	  (match_operand:VF_128 4 "const0_operand")
4266	  (const_int 1)))]
4267  "TARGET_FMA4"
4268  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4269  [(set_attr "type" "ssemuladd")
4270   (set_attr "mode" "<MODE>")])
4271
4272(define_insn "*fma4i_vmfmsub_<mode>"
4273  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4274	(vec_merge:VF_128
4275	  (fma:VF_128
4276	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4277	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4278	    (neg:VF_128
4279	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4280	  (match_operand:VF_128 4 "const0_operand")
4281	  (const_int 1)))]
4282  "TARGET_FMA4"
4283  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4284  [(set_attr "type" "ssemuladd")
4285   (set_attr "mode" "<MODE>")])
4286
4287(define_insn "*fma4i_vmfnmadd_<mode>"
4288  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4289	(vec_merge:VF_128
4290	  (fma:VF_128
4291	    (neg:VF_128
4292	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4293	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
4294	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
4295	  (match_operand:VF_128 4 "const0_operand")
4296	  (const_int 1)))]
4297  "TARGET_FMA4"
4298  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4299  [(set_attr "type" "ssemuladd")
4300   (set_attr "mode" "<MODE>")])
4301
4302(define_insn "*fma4i_vmfnmsub_<mode>"
4303  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4304	(vec_merge:VF_128
4305	  (fma:VF_128
4306	    (neg:VF_128
4307	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4308	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
4309	    (neg:VF_128
4310	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
4311	  (match_operand:VF_128 4 "const0_operand")
4312	  (const_int 1)))]
4313  "TARGET_FMA4"
4314  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4315  [(set_attr "type" "ssemuladd")
4316   (set_attr "mode" "<MODE>")])
4317
4318;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4319;;
4320;; Parallel single-precision floating point conversion operations
4321;;
4322;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4323
4324(define_insn "sse_cvtpi2ps"
4325  [(set (match_operand:V4SF 0 "register_operand" "=x")
4326	(vec_merge:V4SF
4327	  (vec_duplicate:V4SF
4328	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4329	  (match_operand:V4SF 1 "register_operand" "0")
4330	  (const_int 3)))]
4331  "TARGET_SSE"
4332  "cvtpi2ps\t{%2, %0|%0, %2}"
4333  [(set_attr "type" "ssecvt")
4334   (set_attr "mode" "V4SF")])
4335
4336(define_insn "sse_cvtps2pi"
4337  [(set (match_operand:V2SI 0 "register_operand" "=y")
4338	(vec_select:V2SI
4339	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4340		       UNSPEC_FIX_NOTRUNC)
4341	  (parallel [(const_int 0) (const_int 1)])))]
4342  "TARGET_SSE"
4343  "cvtps2pi\t{%1, %0|%0, %q1}"
4344  [(set_attr "type" "ssecvt")
4345   (set_attr "unit" "mmx")
4346   (set_attr "mode" "DI")])
4347
4348(define_insn "sse_cvttps2pi"
4349  [(set (match_operand:V2SI 0 "register_operand" "=y")
4350	(vec_select:V2SI
4351	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4352	  (parallel [(const_int 0) (const_int 1)])))]
4353  "TARGET_SSE"
4354  "cvttps2pi\t{%1, %0|%0, %q1}"
4355  [(set_attr "type" "ssecvt")
4356   (set_attr "unit" "mmx")
4357   (set_attr "prefix_rep" "0")
4358   (set_attr "mode" "SF")])
4359
4360(define_insn "sse_cvtsi2ss<round_name>"
4361  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4362	(vec_merge:V4SF
4363	  (vec_duplicate:V4SF
4364	    (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4365	  (match_operand:V4SF 1 "register_operand" "0,0,v")
4366	  (const_int 1)))]
4367  "TARGET_SSE"
4368  "@
4369   cvtsi2ss\t{%2, %0|%0, %2}
4370   cvtsi2ss\t{%2, %0|%0, %2}
4371   vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4372  [(set_attr "isa" "noavx,noavx,avx")
4373   (set_attr "type" "sseicvt")
4374   (set_attr "athlon_decode" "vector,double,*")
4375   (set_attr "amdfam10_decode" "vector,double,*")
4376   (set_attr "bdver1_decode" "double,direct,*")
4377   (set_attr "btver2_decode" "double,double,double")
4378   (set_attr "znver1_decode" "double,double,double")
4379   (set_attr "prefix" "orig,orig,maybe_evex")
4380   (set_attr "mode" "SF")])
4381
4382(define_insn "sse_cvtsi2ssq<round_name>"
4383  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4384	(vec_merge:V4SF
4385	  (vec_duplicate:V4SF
4386	    (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4387	  (match_operand:V4SF 1 "register_operand" "0,0,v")
4388	  (const_int 1)))]
4389  "TARGET_SSE && TARGET_64BIT"
4390  "@
4391   cvtsi2ssq\t{%2, %0|%0, %2}
4392   cvtsi2ssq\t{%2, %0|%0, %2}
4393   vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4394  [(set_attr "isa" "noavx,noavx,avx")
4395   (set_attr "type" "sseicvt")
4396   (set_attr "athlon_decode" "vector,double,*")
4397   (set_attr "amdfam10_decode" "vector,double,*")
4398   (set_attr "bdver1_decode" "double,direct,*")
4399   (set_attr "btver2_decode" "double,double,double")
4400   (set_attr "length_vex" "*,*,4")
4401   (set_attr "prefix_rex" "1,1,*")
4402   (set_attr "prefix" "orig,orig,maybe_evex")
4403   (set_attr "mode" "SF")])
4404
4405(define_insn "sse_cvtss2si<round_name>"
4406  [(set (match_operand:SI 0 "register_operand" "=r,r")
4407	(unspec:SI
4408	  [(vec_select:SF
4409	     (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4410	     (parallel [(const_int 0)]))]
4411	  UNSPEC_FIX_NOTRUNC))]
4412  "TARGET_SSE"
4413  "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4414  [(set_attr "type" "sseicvt")
4415   (set_attr "athlon_decode" "double,vector")
4416   (set_attr "bdver1_decode" "double,double")
4417   (set_attr "prefix_rep" "1")
4418   (set_attr "prefix" "maybe_vex")
4419   (set_attr "mode" "SI")])
4420
4421(define_insn "sse_cvtss2si_2"
4422  [(set (match_operand:SI 0 "register_operand" "=r,r")
4423	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4424		   UNSPEC_FIX_NOTRUNC))]
4425  "TARGET_SSE"
4426  "%vcvtss2si\t{%1, %0|%0, %k1}"
4427  [(set_attr "type" "sseicvt")
4428   (set_attr "athlon_decode" "double,vector")
4429   (set_attr "amdfam10_decode" "double,double")
4430   (set_attr "bdver1_decode" "double,double")
4431   (set_attr "prefix_rep" "1")
4432   (set_attr "prefix" "maybe_vex")
4433   (set_attr "mode" "SI")])
4434
4435(define_insn "sse_cvtss2siq<round_name>"
4436  [(set (match_operand:DI 0 "register_operand" "=r,r")
4437	(unspec:DI
4438	  [(vec_select:SF
4439	     (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4440	     (parallel [(const_int 0)]))]
4441	  UNSPEC_FIX_NOTRUNC))]
4442  "TARGET_SSE && TARGET_64BIT"
4443  "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4444  [(set_attr "type" "sseicvt")
4445   (set_attr "athlon_decode" "double,vector")
4446   (set_attr "bdver1_decode" "double,double")
4447   (set_attr "prefix_rep" "1")
4448   (set_attr "prefix" "maybe_vex")
4449   (set_attr "mode" "DI")])
4450
4451(define_insn "sse_cvtss2siq_2"
4452  [(set (match_operand:DI 0 "register_operand" "=r,r")
4453	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4454		   UNSPEC_FIX_NOTRUNC))]
4455  "TARGET_SSE && TARGET_64BIT"
4456  "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4457  [(set_attr "type" "sseicvt")
4458   (set_attr "athlon_decode" "double,vector")
4459   (set_attr "amdfam10_decode" "double,double")
4460   (set_attr "bdver1_decode" "double,double")
4461   (set_attr "prefix_rep" "1")
4462   (set_attr "prefix" "maybe_vex")
4463   (set_attr "mode" "DI")])
4464
4465(define_insn "sse_cvttss2si<round_saeonly_name>"
4466  [(set (match_operand:SI 0 "register_operand" "=r,r")
4467	(fix:SI
4468	  (vec_select:SF
4469	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4470	    (parallel [(const_int 0)]))))]
4471  "TARGET_SSE"
4472  "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4473  [(set_attr "type" "sseicvt")
4474   (set_attr "athlon_decode" "double,vector")
4475   (set_attr "amdfam10_decode" "double,double")
4476   (set_attr "bdver1_decode" "double,double")
4477   (set_attr "prefix_rep" "1")
4478   (set_attr "prefix" "maybe_vex")
4479   (set_attr "mode" "SI")])
4480
4481(define_insn "sse_cvttss2siq<round_saeonly_name>"
4482  [(set (match_operand:DI 0 "register_operand" "=r,r")
4483	(fix:DI
4484	  (vec_select:SF
4485	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4486	    (parallel [(const_int 0)]))))]
4487  "TARGET_SSE && TARGET_64BIT"
4488  "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4489  [(set_attr "type" "sseicvt")
4490   (set_attr "athlon_decode" "double,vector")
4491   (set_attr "amdfam10_decode" "double,double")
4492   (set_attr "bdver1_decode" "double,double")
4493   (set_attr "prefix_rep" "1")
4494   (set_attr "prefix" "maybe_vex")
4495   (set_attr "mode" "DI")])
4496
4497(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4498  [(set (match_operand:VF_128 0 "register_operand" "=v")
4499	(vec_merge:VF_128
4500	  (vec_duplicate:VF_128
4501	    (unsigned_float:<ssescalarmode>
4502	      (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4503	  (match_operand:VF_128 1 "register_operand" "v")
4504	  (const_int 1)))]
4505  "TARGET_AVX512F && <round_modev4sf_condition>"
4506  "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4507  [(set_attr "type" "sseicvt")
4508   (set_attr "prefix" "evex")
4509   (set_attr "mode" "<ssescalarmode>")])
4510
4511(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4512  [(set (match_operand:VF_128 0 "register_operand" "=v")
4513	(vec_merge:VF_128
4514	  (vec_duplicate:VF_128
4515	    (unsigned_float:<ssescalarmode>
4516	      (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4517	  (match_operand:VF_128 1 "register_operand" "v")
4518	  (const_int 1)))]
4519  "TARGET_AVX512F && TARGET_64BIT"
4520  "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4521  [(set_attr "type" "sseicvt")
4522   (set_attr "prefix" "evex")
4523   (set_attr "mode" "<ssescalarmode>")])
4524
4525(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4526  [(set (match_operand:VF1 0 "register_operand" "=x,v")
4527	(float:VF1
4528	  (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4529  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4530  "@
4531   cvtdq2ps\t{%1, %0|%0, %1}
4532   vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4533  [(set_attr "isa" "noavx,avx")
4534   (set_attr "type" "ssecvt")
4535   (set_attr "prefix" "maybe_vex")
4536   (set_attr "mode" "<sseinsnmode>")])
4537
4538(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4539  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4540	(unsigned_float:VF1_AVX512VL
4541	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4542  "TARGET_AVX512F"
4543  "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4544  [(set_attr "type" "ssecvt")
4545   (set_attr "prefix" "evex")
4546   (set_attr "mode" "<MODE>")])
4547
4548(define_expand "floatuns<sseintvecmodelower><mode>2"
4549  [(match_operand:VF1 0 "register_operand")
4550   (match_operand:<sseintvecmode> 1 "register_operand")]
4551  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4552{
4553  if (<MODE>mode == V16SFmode)
4554    emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4555  else
4556    if (TARGET_AVX512VL)
4557      {
4558	if (<MODE>mode == V4SFmode)
4559	  emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4560	else
4561	  emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4562      }
4563  else
4564    ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4565
4566  DONE;
4567})
4568
4569
4570;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4571(define_mode_attr sf2simodelower
4572  [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4573
4574(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4575  [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4576	(unspec:VI4_AVX
4577	  [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4578	  UNSPEC_FIX_NOTRUNC))]
4579  "TARGET_SSE2 && <mask_mode512bit_condition>"
4580  "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4581  [(set_attr "type" "ssecvt")
4582   (set (attr "prefix_data16")
4583     (if_then_else
4584       (match_test "TARGET_AVX")
4585     (const_string "*")
4586     (const_string "1")))
4587   (set_attr "prefix" "maybe_vex")
4588   (set_attr "mode" "<sseinsnmode>")])
4589
4590(define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4591  [(set (match_operand:V16SI 0 "register_operand" "=v")
4592	(unspec:V16SI
4593	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4594	  UNSPEC_FIX_NOTRUNC))]
4595  "TARGET_AVX512F"
4596  "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4597  [(set_attr "type" "ssecvt")
4598   (set_attr "prefix" "evex")
4599   (set_attr "mode" "XI")])
4600
4601(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4602  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4603	(unspec:VI4_AVX512VL
4604	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4605	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4606  "TARGET_AVX512F"
4607  "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4608  [(set_attr "type" "ssecvt")
4609   (set_attr "prefix" "evex")
4610   (set_attr "mode" "<sseinsnmode>")])
4611
4612(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4613  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4614	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4615		     UNSPEC_FIX_NOTRUNC))]
4616  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4617  "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4618  [(set_attr "type" "ssecvt")
4619   (set_attr "prefix" "evex")
4620   (set_attr "mode" "<sseinsnmode>")])
4621
4622(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4623  [(set (match_operand:V2DI 0 "register_operand" "=v")
4624	(unspec:V2DI
4625	  [(vec_select:V2SF
4626	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4627	     (parallel [(const_int 0) (const_int 1)]))]
4628	  UNSPEC_FIX_NOTRUNC))]
4629  "TARGET_AVX512DQ && TARGET_AVX512VL"
4630  "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4631  [(set_attr "type" "ssecvt")
4632   (set_attr "prefix" "evex")
4633   (set_attr "mode" "TI")])
4634
4635(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4636  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4637	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4638		     UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4639  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4640  "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4641  [(set_attr "type" "ssecvt")
4642   (set_attr "prefix" "evex")
4643   (set_attr "mode" "<sseinsnmode>")])
4644
4645(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4646  [(set (match_operand:V2DI 0 "register_operand" "=v")
4647	(unspec:V2DI
4648	  [(vec_select:V2SF
4649	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4650	     (parallel [(const_int 0) (const_int 1)]))]
4651	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4652  "TARGET_AVX512DQ && TARGET_AVX512VL"
4653  "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4654  [(set_attr "type" "ssecvt")
4655   (set_attr "prefix" "evex")
4656   (set_attr "mode" "TI")])
4657
4658(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4659  [(set (match_operand:V16SI 0 "register_operand" "=v")
4660	(any_fix:V16SI
4661	  (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4662  "TARGET_AVX512F"
4663  "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4664  [(set_attr "type" "ssecvt")
4665   (set_attr "prefix" "evex")
4666   (set_attr "mode" "XI")])
4667
4668(define_insn "fix_truncv8sfv8si2<mask_name>"
4669  [(set (match_operand:V8SI 0 "register_operand" "=v")
4670	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4671  "TARGET_AVX && <mask_avx512vl_condition>"
4672  "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4673  [(set_attr "type" "ssecvt")
4674   (set_attr "prefix" "<mask_prefix>")
4675   (set_attr "mode" "OI")])
4676
4677(define_insn "fix_truncv4sfv4si2<mask_name>"
4678  [(set (match_operand:V4SI 0 "register_operand" "=v")
4679	(fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4680  "TARGET_SSE2 && <mask_avx512vl_condition>"
4681  "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4682  [(set_attr "type" "ssecvt")
4683   (set (attr "prefix_rep")
4684     (if_then_else
4685       (match_test "TARGET_AVX")
4686     (const_string "*")
4687     (const_string "1")))
4688   (set (attr "prefix_data16")
4689     (if_then_else
4690       (match_test "TARGET_AVX")
4691     (const_string "*")
4692     (const_string "0")))
4693   (set_attr "prefix_data16" "0")
4694   (set_attr "prefix" "<mask_prefix2>")
4695   (set_attr "mode" "TI")])
4696
4697(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4698  [(match_operand:<sseintvecmode> 0 "register_operand")
4699   (match_operand:VF1 1 "register_operand")]
4700  "TARGET_SSE2"
4701{
4702  if (<MODE>mode == V16SFmode)
4703    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4704					  operands[1]));
4705  else
4706    {
4707      rtx tmp[3];
4708      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4709      tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4710      emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4711      emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4712    }
4713  DONE;
4714})
4715
4716;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4717;;
4718;; Parallel double-precision floating point conversion operations
4719;;
4720;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4721
4722(define_insn "sse2_cvtpi2pd"
4723  [(set (match_operand:V2DF 0 "register_operand" "=v,x")
4724	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,?!y")))]
4725  "TARGET_SSE2"
4726  "@
4727   %vcvtdq2pd\t{%1, %0|%0, %1}
4728   cvtpi2pd\t{%1, %0|%0, %1}"
4729  [(set_attr "type" "ssecvt")
4730   (set_attr "unit" "*,mmx")
4731   (set_attr "prefix_data16" "*,1")
4732   (set_attr "prefix" "maybe_vex,*")
4733   (set_attr "mode" "V2DF")])
4734
4735(define_insn "sse2_cvtpd2pi"
4736  [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
4737	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")]
4738		     UNSPEC_FIX_NOTRUNC))]
4739  "TARGET_SSE2"
4740  "@
4741   * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
4742   cvtpd2pi\t{%1, %0|%0, %1}"
4743  [(set_attr "type" "ssecvt")
4744   (set_attr "unit" "*,mmx")
4745   (set_attr "amdfam10_decode" "double")
4746   (set_attr "athlon_decode" "vector")
4747   (set_attr "bdver1_decode" "double")
4748   (set_attr "prefix_data16" "*,1")
4749   (set_attr "prefix" "maybe_vex,*")
4750   (set_attr "mode" "TI")])
4751
4752(define_insn "sse2_cvttpd2pi"
4753  [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
4754	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")))]
4755  "TARGET_SSE2"
4756  "@
4757   * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
4758   cvttpd2pi\t{%1, %0|%0, %1}"
4759  [(set_attr "type" "ssecvt")
4760   (set_attr "unit" "*,mmx")
4761   (set_attr "amdfam10_decode" "double")
4762   (set_attr "athlon_decode" "vector")
4763   (set_attr "bdver1_decode" "double")
4764   (set_attr "prefix_data16" "*,1")
4765   (set_attr "prefix" "maybe_vex,*")
4766   (set_attr "mode" "TI")])
4767
4768(define_insn "sse2_cvtsi2sd"
4769  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4770	(vec_merge:V2DF
4771	  (vec_duplicate:V2DF
4772	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4773	  (match_operand:V2DF 1 "register_operand" "0,0,v")
4774	  (const_int 1)))]
4775  "TARGET_SSE2"
4776  "@
4777   cvtsi2sd\t{%2, %0|%0, %2}
4778   cvtsi2sd\t{%2, %0|%0, %2}
4779   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4780  [(set_attr "isa" "noavx,noavx,avx")
4781   (set_attr "type" "sseicvt")
4782   (set_attr "athlon_decode" "double,direct,*")
4783   (set_attr "amdfam10_decode" "vector,double,*")
4784   (set_attr "bdver1_decode" "double,direct,*")
4785   (set_attr "btver2_decode" "double,double,double")
4786   (set_attr "znver1_decode" "double,double,double")
4787   (set_attr "prefix" "orig,orig,maybe_evex")
4788   (set_attr "mode" "DF")])
4789
4790(define_insn "sse2_cvtsi2sdq<round_name>"
4791  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4792	(vec_merge:V2DF
4793	  (vec_duplicate:V2DF
4794	    (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4795	  (match_operand:V2DF 1 "register_operand" "0,0,v")
4796	  (const_int 1)))]
4797  "TARGET_SSE2 && TARGET_64BIT"
4798  "@
4799   cvtsi2sdq\t{%2, %0|%0, %2}
4800   cvtsi2sdq\t{%2, %0|%0, %2}
4801   vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4802  [(set_attr "isa" "noavx,noavx,avx")
4803   (set_attr "type" "sseicvt")
4804   (set_attr "athlon_decode" "double,direct,*")
4805   (set_attr "amdfam10_decode" "vector,double,*")
4806   (set_attr "bdver1_decode" "double,direct,*")
4807   (set_attr "length_vex" "*,*,4")
4808   (set_attr "prefix_rex" "1,1,*")
4809   (set_attr "prefix" "orig,orig,maybe_evex")
4810   (set_attr "mode" "DF")])
4811
4812(define_insn "avx512f_vcvtss2usi<round_name>"
4813  [(set (match_operand:SI 0 "register_operand" "=r")
4814	(unspec:SI
4815	  [(vec_select:SF
4816	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4817	     (parallel [(const_int 0)]))]
4818	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4819  "TARGET_AVX512F"
4820  "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4821  [(set_attr "type" "sseicvt")
4822   (set_attr "prefix" "evex")
4823   (set_attr "mode" "SI")])
4824
4825(define_insn "avx512f_vcvtss2usiq<round_name>"
4826  [(set (match_operand:DI 0 "register_operand" "=r")
4827	(unspec:DI
4828	  [(vec_select:SF
4829	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4830	     (parallel [(const_int 0)]))]
4831	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4832  "TARGET_AVX512F && TARGET_64BIT"
4833  "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4834  [(set_attr "type" "sseicvt")
4835   (set_attr "prefix" "evex")
4836   (set_attr "mode" "DI")])
4837
4838(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4839  [(set (match_operand:SI 0 "register_operand" "=r")
4840	(unsigned_fix:SI
4841	  (vec_select:SF
4842	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4843	    (parallel [(const_int 0)]))))]
4844  "TARGET_AVX512F"
4845  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4846  [(set_attr "type" "sseicvt")
4847   (set_attr "prefix" "evex")
4848   (set_attr "mode" "SI")])
4849
4850(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4851  [(set (match_operand:DI 0 "register_operand" "=r")
4852	(unsigned_fix:DI
4853	  (vec_select:SF
4854	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4855	    (parallel [(const_int 0)]))))]
4856  "TARGET_AVX512F && TARGET_64BIT"
4857  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4858  [(set_attr "type" "sseicvt")
4859   (set_attr "prefix" "evex")
4860   (set_attr "mode" "DI")])
4861
4862(define_insn "avx512f_vcvtsd2usi<round_name>"
4863  [(set (match_operand:SI 0 "register_operand" "=r")
4864	(unspec:SI
4865	  [(vec_select:DF
4866	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4867	     (parallel [(const_int 0)]))]
4868	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4869  "TARGET_AVX512F"
4870  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4871  [(set_attr "type" "sseicvt")
4872   (set_attr "prefix" "evex")
4873   (set_attr "mode" "SI")])
4874
4875(define_insn "avx512f_vcvtsd2usiq<round_name>"
4876  [(set (match_operand:DI 0 "register_operand" "=r")
4877	(unspec:DI
4878	  [(vec_select:DF
4879	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4880	     (parallel [(const_int 0)]))]
4881	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4882  "TARGET_AVX512F && TARGET_64BIT"
4883  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4884  [(set_attr "type" "sseicvt")
4885   (set_attr "prefix" "evex")
4886   (set_attr "mode" "DI")])
4887
4888(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4889  [(set (match_operand:SI 0 "register_operand" "=r")
4890	(unsigned_fix:SI
4891	  (vec_select:DF
4892	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4893	    (parallel [(const_int 0)]))))]
4894  "TARGET_AVX512F"
4895  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4896  [(set_attr "type" "sseicvt")
4897   (set_attr "prefix" "evex")
4898   (set_attr "mode" "SI")])
4899
4900(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4901  [(set (match_operand:DI 0 "register_operand" "=r")
4902	(unsigned_fix:DI
4903	  (vec_select:DF
4904	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4905	    (parallel [(const_int 0)]))))]
4906  "TARGET_AVX512F && TARGET_64BIT"
4907  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4908  [(set_attr "type" "sseicvt")
4909   (set_attr "prefix" "evex")
4910   (set_attr "mode" "DI")])
4911
4912(define_insn "sse2_cvtsd2si<round_name>"
4913  [(set (match_operand:SI 0 "register_operand" "=r,r")
4914	(unspec:SI
4915	  [(vec_select:DF
4916	     (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4917	     (parallel [(const_int 0)]))]
4918	  UNSPEC_FIX_NOTRUNC))]
4919  "TARGET_SSE2"
4920  "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4921  [(set_attr "type" "sseicvt")
4922   (set_attr "athlon_decode" "double,vector")
4923   (set_attr "bdver1_decode" "double,double")
4924   (set_attr "btver2_decode" "double,double")
4925   (set_attr "prefix_rep" "1")
4926   (set_attr "prefix" "maybe_vex")
4927   (set_attr "mode" "SI")])
4928
4929(define_insn "sse2_cvtsd2si_2"
4930  [(set (match_operand:SI 0 "register_operand" "=r,r")
4931	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4932		   UNSPEC_FIX_NOTRUNC))]
4933  "TARGET_SSE2"
4934  "%vcvtsd2si\t{%1, %0|%0, %q1}"
4935  [(set_attr "type" "sseicvt")
4936   (set_attr "athlon_decode" "double,vector")
4937   (set_attr "amdfam10_decode" "double,double")
4938   (set_attr "bdver1_decode" "double,double")
4939   (set_attr "prefix_rep" "1")
4940   (set_attr "prefix" "maybe_vex")
4941   (set_attr "mode" "SI")])
4942
4943(define_insn "sse2_cvtsd2siq<round_name>"
4944  [(set (match_operand:DI 0 "register_operand" "=r,r")
4945	(unspec:DI
4946	  [(vec_select:DF
4947	     (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4948	     (parallel [(const_int 0)]))]
4949	  UNSPEC_FIX_NOTRUNC))]
4950  "TARGET_SSE2 && TARGET_64BIT"
4951  "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4952  [(set_attr "type" "sseicvt")
4953   (set_attr "athlon_decode" "double,vector")
4954   (set_attr "bdver1_decode" "double,double")
4955   (set_attr "prefix_rep" "1")
4956   (set_attr "prefix" "maybe_vex")
4957   (set_attr "mode" "DI")])
4958
4959(define_insn "sse2_cvtsd2siq_2"
4960  [(set (match_operand:DI 0 "register_operand" "=r,r")
4961	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4962		   UNSPEC_FIX_NOTRUNC))]
4963  "TARGET_SSE2 && TARGET_64BIT"
4964  "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4965  [(set_attr "type" "sseicvt")
4966   (set_attr "athlon_decode" "double,vector")
4967   (set_attr "amdfam10_decode" "double,double")
4968   (set_attr "bdver1_decode" "double,double")
4969   (set_attr "prefix_rep" "1")
4970   (set_attr "prefix" "maybe_vex")
4971   (set_attr "mode" "DI")])
4972
4973(define_insn "sse2_cvttsd2si<round_saeonly_name>"
4974  [(set (match_operand:SI 0 "register_operand" "=r,r")
4975	(fix:SI
4976	  (vec_select:DF
4977	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4978	    (parallel [(const_int 0)]))))]
4979  "TARGET_SSE2"
4980  "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4981  [(set_attr "type" "sseicvt")
4982   (set_attr "athlon_decode" "double,vector")
4983   (set_attr "amdfam10_decode" "double,double")
4984   (set_attr "bdver1_decode" "double,double")
4985   (set_attr "btver2_decode" "double,double")
4986   (set_attr "prefix_rep" "1")
4987   (set_attr "prefix" "maybe_vex")
4988   (set_attr "mode" "SI")])
4989
4990(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4991  [(set (match_operand:DI 0 "register_operand" "=r,r")
4992	(fix:DI
4993	  (vec_select:DF
4994	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4995	    (parallel [(const_int 0)]))))]
4996  "TARGET_SSE2 && TARGET_64BIT"
4997  "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4998  [(set_attr "type" "sseicvt")
4999   (set_attr "athlon_decode" "double,vector")
5000   (set_attr "amdfam10_decode" "double,double")
5001   (set_attr "bdver1_decode" "double,double")
5002   (set_attr "prefix_rep" "1")
5003   (set_attr "prefix" "maybe_vex")
5004   (set_attr "mode" "DI")])
5005
5006;; For float<si2dfmode><mode>2 insn pattern
5007(define_mode_attr si2dfmode
5008  [(V8DF "V8SI") (V4DF "V4SI")])
5009(define_mode_attr si2dfmodelower
5010  [(V8DF "v8si") (V4DF "v4si")])
5011
5012(define_insn "float<si2dfmodelower><mode>2<mask_name>"
5013  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5014	(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5015  "TARGET_AVX && <mask_mode512bit_condition>"
5016  "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5017  [(set_attr "type" "ssecvt")
5018   (set_attr "prefix" "maybe_vex")
5019   (set_attr "mode" "<MODE>")])
5020
5021(define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
5022  [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5023	(any_float:VF2_AVX512VL
5024	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5025  "TARGET_AVX512DQ"
5026  "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5027  [(set_attr "type" "ssecvt")
5028   (set_attr "prefix" "evex")
5029   (set_attr "mode" "<MODE>")])
5030
5031;; For <floatsuffix>float<sselondveclower><mode> insn patterns
5032(define_mode_attr qq2pssuff
5033  [(V8SF "") (V4SF "{y}")])
5034
5035(define_mode_attr sselongvecmode
5036  [(V8SF "V8DI") (V4SF  "V4DI")])
5037
5038(define_mode_attr sselongvecmodelower
5039  [(V8SF "v8di") (V4SF  "v4di")])
5040
5041(define_mode_attr sseintvecmode3
5042  [(V8SF "XI") (V4SF "OI")
5043   (V8DF "OI") (V4DF "TI")])
5044
5045(define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
5046  [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5047	 (any_float:VF1_128_256VL
5048	   (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5049  "TARGET_AVX512DQ && <round_modev8sf_condition>"
5050  "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5051  [(set_attr "type" "ssecvt")
5052   (set_attr "prefix" "evex")
5053   (set_attr "mode" "<MODE>")])
5054
5055(define_insn "*<floatsuffix>floatv2div2sf2"
5056  [(set (match_operand:V4SF 0 "register_operand" "=v")
5057    (vec_concat:V4SF
5058	    (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5059	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5060  "TARGET_AVX512DQ && TARGET_AVX512VL"
5061  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5062  [(set_attr "type" "ssecvt")
5063   (set_attr "prefix" "evex")
5064   (set_attr "mode" "V4SF")])
5065
5066(define_insn "<floatsuffix>floatv2div2sf2_mask"
5067  [(set (match_operand:V4SF 0 "register_operand" "=v")
5068    (vec_concat:V4SF
5069        (vec_merge:V2SF
5070	        (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5071            (vec_select:V2SF
5072                (match_operand:V4SF 2 "vector_move_operand" "0C")
5073                (parallel [(const_int 0) (const_int 1)]))
5074            (match_operand:QI 3 "register_operand" "Yk"))
5075	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5076  "TARGET_AVX512DQ && TARGET_AVX512VL"
5077  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5078  [(set_attr "type" "ssecvt")
5079   (set_attr "prefix" "evex")
5080   (set_attr "mode" "V4SF")])
5081
5082(define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
5083  [(set (match_operand:V4SF 0 "register_operand" "=v")
5084    (vec_concat:V4SF
5085	(vec_merge:V2SF
5086		(any_float:V2SF (match_operand:V2DI 1
5087				  "nonimmediate_operand" "vm"))
5088	    (const_vector:V2SF [(const_int 0) (const_int 0)])
5089	    (match_operand:QI 2 "register_operand" "Yk"))
5090	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5091  "TARGET_AVX512DQ && TARGET_AVX512VL"
5092  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5093  [(set_attr "type" "ssecvt")
5094   (set_attr "prefix" "evex")
5095   (set_attr "mode" "V4SF")])
5096
5097(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5098  [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5099	(unsigned_float:VF2_512_256VL
5100	  (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5101   "TARGET_AVX512F"
5102   "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5103   [(set_attr "type" "ssecvt")
5104    (set_attr "prefix" "evex")
5105    (set_attr "mode" "<MODE>")])
5106
5107(define_insn "ufloatv2siv2df2<mask_name>"
5108  [(set (match_operand:V2DF 0 "register_operand" "=v")
5109	(unsigned_float:V2DF
5110	  (vec_select:V2SI
5111	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5112	    (parallel [(const_int 0) (const_int 1)]))))]
5113  "TARGET_AVX512VL"
5114  "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5115  [(set_attr "type" "ssecvt")
5116   (set_attr "prefix" "evex")
5117   (set_attr "mode" "V2DF")])
5118
5119(define_insn "avx512f_cvtdq2pd512_2"
5120  [(set (match_operand:V8DF 0 "register_operand" "=v")
5121	(float:V8DF
5122	  (vec_select:V8SI
5123	    (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5124	    (parallel [(const_int 0) (const_int 1)
5125		       (const_int 2) (const_int 3)
5126		       (const_int 4) (const_int 5)
5127		       (const_int 6) (const_int 7)]))))]
5128  "TARGET_AVX512F"
5129  "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5130  [(set_attr "type" "ssecvt")
5131   (set_attr "prefix" "evex")
5132   (set_attr "mode" "V8DF")])
5133
5134(define_insn "avx_cvtdq2pd256_2"
5135  [(set (match_operand:V4DF 0 "register_operand" "=v")
5136	(float:V4DF
5137	  (vec_select:V4SI
5138	    (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5139	    (parallel [(const_int 0) (const_int 1)
5140		       (const_int 2) (const_int 3)]))))]
5141  "TARGET_AVX"
5142  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5143  [(set_attr "type" "ssecvt")
5144   (set_attr "prefix" "maybe_evex")
5145   (set_attr "mode" "V4DF")])
5146
5147(define_insn "sse2_cvtdq2pd<mask_name>"
5148  [(set (match_operand:V2DF 0 "register_operand" "=v")
5149	(float:V2DF
5150	  (vec_select:V2SI
5151	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5152	    (parallel [(const_int 0) (const_int 1)]))))]
5153  "TARGET_SSE2 && <mask_avx512vl_condition>"
5154  "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5155  [(set_attr "type" "ssecvt")
5156   (set_attr "prefix" "maybe_vex")
5157   (set_attr "mode" "V2DF")])
5158
5159(define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5160  [(set (match_operand:V8SI 0 "register_operand" "=v")
5161	(unspec:V8SI
5162	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5163	  UNSPEC_FIX_NOTRUNC))]
5164  "TARGET_AVX512F"
5165  "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5166  [(set_attr "type" "ssecvt")
5167   (set_attr "prefix" "evex")
5168   (set_attr "mode" "OI")])
5169
5170(define_insn "avx_cvtpd2dq256<mask_name>"
5171  [(set (match_operand:V4SI 0 "register_operand" "=v")
5172	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5173		     UNSPEC_FIX_NOTRUNC))]
5174  "TARGET_AVX && <mask_avx512vl_condition>"
5175  "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5176  [(set_attr "type" "ssecvt")
5177   (set_attr "prefix" "<mask_prefix>")
5178   (set_attr "mode" "OI")])
5179
5180(define_expand "avx_cvtpd2dq256_2"
5181  [(set (match_operand:V8SI 0 "register_operand")
5182	(vec_concat:V8SI
5183	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5184		       UNSPEC_FIX_NOTRUNC)
5185	  (match_dup 2)))]
5186  "TARGET_AVX"
5187  "operands[2] = CONST0_RTX (V4SImode);")
5188
5189(define_insn "*avx_cvtpd2dq256_2"
5190  [(set (match_operand:V8SI 0 "register_operand" "=v")
5191	(vec_concat:V8SI
5192	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5193		       UNSPEC_FIX_NOTRUNC)
5194	  (match_operand:V4SI 2 "const0_operand")))]
5195  "TARGET_AVX"
5196  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5197  [(set_attr "type" "ssecvt")
5198   (set_attr "prefix" "vex")
5199   (set_attr "btver2_decode" "vector")
5200   (set_attr "mode" "OI")])
5201
5202(define_insn "sse2_cvtpd2dq<mask_name>"
5203  [(set (match_operand:V4SI 0 "register_operand" "=v")
5204	(vec_concat:V4SI
5205	  (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5206		       UNSPEC_FIX_NOTRUNC)
5207	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5208  "TARGET_SSE2 && <mask_avx512vl_condition>"
5209{
5210  if (TARGET_AVX)
5211    return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5212  else
5213    return "cvtpd2dq\t{%1, %0|%0, %1}";
5214}
5215  [(set_attr "type" "ssecvt")
5216   (set_attr "prefix_rep" "1")
5217   (set_attr "prefix_data16" "0")
5218   (set_attr "prefix" "maybe_vex")
5219   (set_attr "mode" "TI")
5220   (set_attr "amdfam10_decode" "double")
5221   (set_attr "athlon_decode" "vector")
5222   (set_attr "bdver1_decode" "double")])
5223
5224;; For ufix_notrunc* insn patterns
5225(define_mode_attr pd2udqsuff
5226  [(V8DF "") (V4DF "{y}")])
5227
5228(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5229  [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5230	(unspec:<si2dfmode>
5231	  [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5232	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5233  "TARGET_AVX512F"
5234  "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5235  [(set_attr "type" "ssecvt")
5236   (set_attr "prefix" "evex")
5237   (set_attr "mode" "<sseinsnmode>")])
5238
5239(define_insn "ufix_notruncv2dfv2si2<mask_name>"
5240  [(set (match_operand:V4SI 0 "register_operand" "=v")
5241	(vec_concat:V4SI
5242	  (unspec:V2SI
5243	    [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5244	    UNSPEC_UNSIGNED_FIX_NOTRUNC)
5245	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5246  "TARGET_AVX512VL"
5247  "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5248  [(set_attr "type" "ssecvt")
5249   (set_attr "prefix" "evex")
5250   (set_attr "mode" "TI")])
5251
5252(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
5253  [(set (match_operand:V8SI 0 "register_operand" "=v")
5254	(any_fix:V8SI
5255	  (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5256  "TARGET_AVX512F"
5257  "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5258  [(set_attr "type" "ssecvt")
5259   (set_attr "prefix" "evex")
5260   (set_attr "mode" "OI")])
5261
5262(define_insn "ufix_truncv2dfv2si2<mask_name>"
5263  [(set (match_operand:V4SI 0 "register_operand" "=v")
5264	(vec_concat:V4SI
5265	  (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5266	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5267  "TARGET_AVX512VL"
5268  "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5269  [(set_attr "type" "ssecvt")
5270   (set_attr "prefix" "evex")
5271   (set_attr "mode" "TI")])
5272
5273(define_insn "fix_truncv4dfv4si2<mask_name>"
5274  [(set (match_operand:V4SI 0 "register_operand" "=v")
5275	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5276  "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5277  "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5278  [(set_attr "type" "ssecvt")
5279   (set_attr "prefix" "maybe_evex")
5280   (set_attr "mode" "OI")])
5281
5282(define_insn "ufix_truncv4dfv4si2<mask_name>"
5283  [(set (match_operand:V4SI 0 "register_operand" "=v")
5284	(unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5285  "TARGET_AVX512VL && TARGET_AVX512F"
5286  "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5287  [(set_attr "type" "ssecvt")
5288   (set_attr "prefix" "maybe_evex")
5289   (set_attr "mode" "OI")])
5290
5291(define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5292  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5293	(any_fix:<sseintvecmode>
5294	  (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5295  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5296  "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5297  [(set_attr "type" "ssecvt")
5298   (set_attr "prefix" "evex")
5299   (set_attr "mode" "<sseintvecmode2>")])
5300
5301(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5302  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5303	(unspec:<sseintvecmode>
5304	  [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5305	  UNSPEC_FIX_NOTRUNC))]
5306  "TARGET_AVX512DQ && <round_mode512bit_condition>"
5307  "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5308  [(set_attr "type" "ssecvt")
5309   (set_attr "prefix" "evex")
5310   (set_attr "mode" "<sseintvecmode2>")])
5311
5312(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5313  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5314	(unspec:<sseintvecmode>
5315	  [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5316	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5317  "TARGET_AVX512DQ && <round_mode512bit_condition>"
5318  "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5319  [(set_attr "type" "ssecvt")
5320   (set_attr "prefix" "evex")
5321   (set_attr "mode" "<sseintvecmode2>")])
5322
5323(define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5324  [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5325	(any_fix:<sselongvecmode>
5326	  (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5327  "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5328  "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5329  [(set_attr "type" "ssecvt")
5330   (set_attr "prefix" "evex")
5331   (set_attr "mode" "<sseintvecmode3>")])
5332
5333(define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5334  [(set (match_operand:V2DI 0 "register_operand" "=v")
5335	(any_fix:V2DI
5336	  (vec_select:V2SF
5337	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5338	    (parallel [(const_int 0) (const_int 1)]))))]
5339  "TARGET_AVX512DQ && TARGET_AVX512VL"
5340  "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5341  [(set_attr "type" "ssecvt")
5342   (set_attr "prefix" "evex")
5343   (set_attr "mode" "TI")])
5344
5345(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5346  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5347	(unsigned_fix:<sseintvecmode>
5348	  (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5349  "TARGET_AVX512VL"
5350  "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5351  [(set_attr "type" "ssecvt")
5352   (set_attr "prefix" "evex")
5353   (set_attr "mode" "<sseintvecmode2>")])
5354
5355(define_expand "avx_cvttpd2dq256_2"
5356  [(set (match_operand:V8SI 0 "register_operand")
5357	(vec_concat:V8SI
5358	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5359	  (match_dup 2)))]
5360  "TARGET_AVX"
5361  "operands[2] = CONST0_RTX (V4SImode);")
5362
5363(define_insn "sse2_cvttpd2dq<mask_name>"
5364  [(set (match_operand:V4SI 0 "register_operand" "=v")
5365	(vec_concat:V4SI
5366	  (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
5367	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5368  "TARGET_SSE2 && <mask_avx512vl_condition>"
5369{
5370  if (TARGET_AVX)
5371    return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5372  else
5373    return "cvttpd2dq\t{%1, %0|%0, %1}";
5374}
5375  [(set_attr "type" "ssecvt")
5376   (set_attr "amdfam10_decode" "double")
5377   (set_attr "athlon_decode" "vector")
5378   (set_attr "bdver1_decode" "double")
5379   (set_attr "prefix" "maybe_vex")
5380   (set_attr "mode" "TI")])
5381
5382(define_insn "sse2_cvtsd2ss<round_name>"
5383  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5384	(vec_merge:V4SF
5385	  (vec_duplicate:V4SF
5386	    (float_truncate:V2SF
5387	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5388	  (match_operand:V4SF 1 "register_operand" "0,0,v")
5389	  (const_int 1)))]
5390  "TARGET_SSE2"
5391  "@
5392   cvtsd2ss\t{%2, %0|%0, %2}
5393   cvtsd2ss\t{%2, %0|%0, %q2}
5394   vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5395  [(set_attr "isa" "noavx,noavx,avx")
5396   (set_attr "type" "ssecvt")
5397   (set_attr "athlon_decode" "vector,double,*")
5398   (set_attr "amdfam10_decode" "vector,double,*")
5399   (set_attr "bdver1_decode" "direct,direct,*")
5400   (set_attr "btver2_decode" "double,double,double")
5401   (set_attr "prefix" "orig,orig,<round_prefix>")
5402   (set_attr "mode" "SF")])
5403
5404(define_insn "*sse2_vd_cvtsd2ss"
5405  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5406	(vec_merge:V4SF
5407	  (vec_duplicate:V4SF
5408	    (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
5409	  (match_operand:V4SF 1 "register_operand" "0,0,v")
5410	  (const_int 1)))]
5411  "TARGET_SSE2"
5412  "@
5413   cvtsd2ss\t{%2, %0|%0, %2}
5414   cvtsd2ss\t{%2, %0|%0, %2}
5415   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
5416  [(set_attr "isa" "noavx,noavx,avx")
5417   (set_attr "type" "ssecvt")
5418   (set_attr "athlon_decode" "vector,double,*")
5419   (set_attr "amdfam10_decode" "vector,double,*")
5420   (set_attr "bdver1_decode" "direct,direct,*")
5421   (set_attr "btver2_decode" "double,double,double")
5422   (set_attr "prefix" "orig,orig,vex")
5423   (set_attr "mode" "SF")])
5424
5425(define_insn "sse2_cvtss2sd<round_saeonly_name>"
5426  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5427	(vec_merge:V2DF
5428	  (float_extend:V2DF
5429	    (vec_select:V2SF
5430	      (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
5431	      (parallel [(const_int 0) (const_int 1)])))
5432	  (match_operand:V2DF 1 "register_operand" "0,0,v")
5433	  (const_int 1)))]
5434  "TARGET_SSE2"
5435  "@
5436   cvtss2sd\t{%2, %0|%0, %2}
5437   cvtss2sd\t{%2, %0|%0, %k2}
5438   vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5439  [(set_attr "isa" "noavx,noavx,avx")
5440   (set_attr "type" "ssecvt")
5441   (set_attr "amdfam10_decode" "vector,double,*")
5442   (set_attr "athlon_decode" "direct,direct,*")
5443   (set_attr "bdver1_decode" "direct,direct,*")
5444   (set_attr "btver2_decode" "double,double,double")
5445   (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5446   (set_attr "mode" "DF")])
5447
5448(define_insn "*sse2_vd_cvtss2sd"
5449  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5450	(vec_merge:V2DF
5451	  (vec_duplicate:V2DF
5452	    (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
5453	  (match_operand:V2DF 1 "register_operand" "0,0,v")
5454	  (const_int 1)))]
5455  "TARGET_SSE2"
5456  "@
5457   cvtss2sd\t{%2, %0|%0, %2}
5458   cvtss2sd\t{%2, %0|%0, %2}
5459   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
5460  [(set_attr "isa" "noavx,noavx,avx")
5461   (set_attr "type" "ssecvt")
5462   (set_attr "amdfam10_decode" "vector,double,*")
5463   (set_attr "athlon_decode" "direct,direct,*")
5464   (set_attr "bdver1_decode" "direct,direct,*")
5465   (set_attr "btver2_decode" "double,double,double")
5466   (set_attr "prefix" "orig,orig,vex")
5467   (set_attr "mode" "DF")])
5468
5469(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5470  [(set (match_operand:V8SF 0 "register_operand" "=v")
5471	(float_truncate:V8SF
5472	  (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5473  "TARGET_AVX512F"
5474  "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5475  [(set_attr "type" "ssecvt")
5476   (set_attr "prefix" "evex")
5477   (set_attr "mode" "V8SF")])
5478
5479(define_insn "avx_cvtpd2ps256<mask_name>"
5480  [(set (match_operand:V4SF 0 "register_operand" "=v")
5481	(float_truncate:V4SF
5482	  (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5483  "TARGET_AVX && <mask_avx512vl_condition>"
5484  "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5485  [(set_attr "type" "ssecvt")
5486   (set_attr "prefix" "maybe_evex")
5487   (set_attr "btver2_decode" "vector")
5488   (set_attr "mode" "V4SF")])
5489
5490(define_expand "sse2_cvtpd2ps"
5491  [(set (match_operand:V4SF 0 "register_operand")
5492	(vec_concat:V4SF
5493	  (float_truncate:V2SF
5494	    (match_operand:V2DF 1 "vector_operand"))
5495	  (match_dup 2)))]
5496  "TARGET_SSE2"
5497  "operands[2] = CONST0_RTX (V2SFmode);")
5498
5499(define_expand "sse2_cvtpd2ps_mask"
5500  [(set (match_operand:V4SF 0 "register_operand")
5501	(vec_merge:V4SF
5502	  (vec_concat:V4SF
5503	    (float_truncate:V2SF
5504	      (match_operand:V2DF 1 "vector_operand"))
5505	    (match_dup 4))
5506	  (match_operand:V4SF 2 "register_operand")
5507	  (match_operand:QI 3 "register_operand")))]
5508  "TARGET_SSE2"
5509  "operands[4] = CONST0_RTX (V2SFmode);")
5510
5511(define_insn "*sse2_cvtpd2ps<mask_name>"
5512  [(set (match_operand:V4SF 0 "register_operand" "=v")
5513	(vec_concat:V4SF
5514	  (float_truncate:V2SF
5515	    (match_operand:V2DF 1 "vector_operand" "vBm"))
5516	  (match_operand:V2SF 2 "const0_operand")))]
5517  "TARGET_SSE2 && <mask_avx512vl_condition>"
5518{
5519  if (TARGET_AVX)
5520    return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5521  else
5522    return "cvtpd2ps\t{%1, %0|%0, %1}";
5523}
5524  [(set_attr "type" "ssecvt")
5525   (set_attr "amdfam10_decode" "double")
5526   (set_attr "athlon_decode" "vector")
5527   (set_attr "bdver1_decode" "double")
5528   (set_attr "prefix_data16" "1")
5529   (set_attr "prefix" "maybe_vex")
5530   (set_attr "mode" "V4SF")])
5531
5532;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5533(define_mode_attr sf2dfmode
5534  [(V8DF "V8SF") (V4DF "V4SF")])
5535
5536(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5537  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5538	(float_extend:VF2_512_256
5539	  (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5540  "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5541  "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5542  [(set_attr "type" "ssecvt")
5543   (set_attr "prefix" "maybe_vex")
5544   (set_attr "mode" "<MODE>")])
5545
5546(define_insn "*avx_cvtps2pd256_2"
5547  [(set (match_operand:V4DF 0 "register_operand" "=v")
5548	(float_extend:V4DF
5549	  (vec_select:V4SF
5550	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5551	    (parallel [(const_int 0) (const_int 1)
5552		       (const_int 2) (const_int 3)]))))]
5553  "TARGET_AVX"
5554  "vcvtps2pd\t{%x1, %0|%0, %x1}"
5555  [(set_attr "type" "ssecvt")
5556   (set_attr "prefix" "vex")
5557   (set_attr "mode" "V4DF")])
5558
5559(define_insn "vec_unpacks_lo_v16sf"
5560  [(set (match_operand:V8DF 0 "register_operand" "=v")
5561	(float_extend:V8DF
5562	  (vec_select:V8SF
5563	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5564	    (parallel [(const_int 0) (const_int 1)
5565		       (const_int 2) (const_int 3)
5566		       (const_int 4) (const_int 5)
5567		       (const_int 6) (const_int 7)]))))]
5568  "TARGET_AVX512F"
5569  "vcvtps2pd\t{%t1, %0|%0, %t1}"
5570  [(set_attr "type" "ssecvt")
5571   (set_attr "prefix" "evex")
5572   (set_attr "mode" "V8DF")])
5573
5574(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5575  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5576	(unspec:<avx512fmaskmode>
5577	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5578	 UNSPEC_CVTINT2MASK))]
5579  "TARGET_AVX512BW"
5580  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5581  [(set_attr "prefix" "evex")
5582   (set_attr "mode" "<sseinsnmode>")])
5583
5584(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5585  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5586	(unspec:<avx512fmaskmode>
5587	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5588	 UNSPEC_CVTINT2MASK))]
5589  "TARGET_AVX512DQ"
5590  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5591  [(set_attr "prefix" "evex")
5592   (set_attr "mode" "<sseinsnmode>")])
5593
5594(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5595  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5596	(vec_merge:VI12_AVX512VL
5597	  (match_dup 2)
5598	  (match_dup 3)
5599	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5600  "TARGET_AVX512BW"
5601  {
5602    operands[2] = CONSTM1_RTX (<MODE>mode);
5603    operands[3] = CONST0_RTX (<MODE>mode);
5604  })
5605
5606(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5607  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5608	(vec_merge:VI12_AVX512VL
5609	  (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5610	  (match_operand:VI12_AVX512VL 3 "const0_operand")
5611	  (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5612  "TARGET_AVX512BW"
5613  "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5614  [(set_attr "prefix" "evex")
5615   (set_attr "mode" "<sseinsnmode>")])
5616
5617(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5618  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5619	(vec_merge:VI48_AVX512VL
5620	  (match_dup 2)
5621	  (match_dup 3)
5622	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5623  "TARGET_AVX512DQ"
5624  "{
5625    operands[2] = CONSTM1_RTX (<MODE>mode);
5626    operands[3] = CONST0_RTX (<MODE>mode);
5627  }")
5628
5629(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5630  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5631	(vec_merge:VI48_AVX512VL
5632	  (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5633	  (match_operand:VI48_AVX512VL 3 "const0_operand")
5634	  (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5635  "TARGET_AVX512DQ"
5636  "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5637  [(set_attr "prefix" "evex")
5638   (set_attr "mode" "<sseinsnmode>")])
5639
5640(define_insn "sse2_cvtps2pd<mask_name>"
5641  [(set (match_operand:V2DF 0 "register_operand" "=v")
5642	(float_extend:V2DF
5643	  (vec_select:V2SF
5644	    (match_operand:V4SF 1 "vector_operand" "vm")
5645	    (parallel [(const_int 0) (const_int 1)]))))]
5646  "TARGET_SSE2 && <mask_avx512vl_condition>"
5647  "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5648  [(set_attr "type" "ssecvt")
5649   (set_attr "amdfam10_decode" "direct")
5650   (set_attr "athlon_decode" "double")
5651   (set_attr "bdver1_decode" "double")
5652   (set_attr "prefix_data16" "0")
5653   (set_attr "prefix" "maybe_vex")
5654   (set_attr "mode" "V2DF")])
5655
5656(define_expand "vec_unpacks_hi_v4sf"
5657  [(set (match_dup 2)
5658   (vec_select:V4SF
5659     (vec_concat:V8SF
5660       (match_dup 2)
5661       (match_operand:V4SF 1 "vector_operand"))
5662     (parallel [(const_int 6) (const_int 7)
5663		(const_int 2) (const_int 3)])))
5664  (set (match_operand:V2DF 0 "register_operand")
5665   (float_extend:V2DF
5666     (vec_select:V2SF
5667       (match_dup 2)
5668       (parallel [(const_int 0) (const_int 1)]))))]
5669  "TARGET_SSE2"
5670  "operands[2] = gen_reg_rtx (V4SFmode);")
5671
5672(define_expand "vec_unpacks_hi_v8sf"
5673  [(set (match_dup 2)
5674	(vec_select:V4SF
5675	  (match_operand:V8SF 1 "register_operand")
5676	  (parallel [(const_int 4) (const_int 5)
5677		     (const_int 6) (const_int 7)])))
5678   (set (match_operand:V4DF 0 "register_operand")
5679	(float_extend:V4DF
5680	  (match_dup 2)))]
5681  "TARGET_AVX"
5682  "operands[2] = gen_reg_rtx (V4SFmode);")
5683
5684(define_expand "vec_unpacks_hi_v16sf"
5685  [(set (match_dup 2)
5686	(vec_select:V8SF
5687	  (match_operand:V16SF 1 "register_operand")
5688	  (parallel [(const_int 8) (const_int 9)
5689		     (const_int 10) (const_int 11)
5690		     (const_int 12) (const_int 13)
5691		     (const_int 14) (const_int 15)])))
5692   (set (match_operand:V8DF 0 "register_operand")
5693	(float_extend:V8DF
5694	  (match_dup 2)))]
5695"TARGET_AVX512F"
5696"operands[2] = gen_reg_rtx (V8SFmode);")
5697
5698(define_expand "vec_unpacks_lo_v4sf"
5699  [(set (match_operand:V2DF 0 "register_operand")
5700	(float_extend:V2DF
5701	  (vec_select:V2SF
5702	    (match_operand:V4SF 1 "vector_operand")
5703	    (parallel [(const_int 0) (const_int 1)]))))]
5704  "TARGET_SSE2")
5705
5706(define_expand "vec_unpacks_lo_v8sf"
5707  [(set (match_operand:V4DF 0 "register_operand")
5708	(float_extend:V4DF
5709	  (vec_select:V4SF
5710	    (match_operand:V8SF 1 "nonimmediate_operand")
5711	    (parallel [(const_int 0) (const_int 1)
5712		       (const_int 2) (const_int 3)]))))]
5713  "TARGET_AVX")
5714
5715(define_mode_attr sseunpackfltmode
5716  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5717  (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5718
5719(define_expand "vec_unpacks_float_hi_<mode>"
5720  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5721   (match_operand:VI2_AVX512F 1 "register_operand")]
5722  "TARGET_SSE2"
5723{
5724  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5725
5726  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5727  emit_insn (gen_rtx_SET (operands[0],
5728			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5729  DONE;
5730})
5731
5732(define_expand "vec_unpacks_float_lo_<mode>"
5733  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5734   (match_operand:VI2_AVX512F 1 "register_operand")]
5735  "TARGET_SSE2"
5736{
5737  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5738
5739  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5740  emit_insn (gen_rtx_SET (operands[0],
5741			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5742  DONE;
5743})
5744
5745(define_expand "vec_unpacku_float_hi_<mode>"
5746  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5747   (match_operand:VI2_AVX512F 1 "register_operand")]
5748  "TARGET_SSE2"
5749{
5750  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5751
5752  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5753  emit_insn (gen_rtx_SET (operands[0],
5754			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5755  DONE;
5756})
5757
5758(define_expand "vec_unpacku_float_lo_<mode>"
5759  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5760   (match_operand:VI2_AVX512F 1 "register_operand")]
5761  "TARGET_SSE2"
5762{
5763  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5764
5765  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5766  emit_insn (gen_rtx_SET (operands[0],
5767			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5768  DONE;
5769})
5770
5771(define_expand "vec_unpacks_float_hi_v4si"
5772  [(set (match_dup 2)
5773	(vec_select:V4SI
5774	  (match_operand:V4SI 1 "vector_operand")
5775	  (parallel [(const_int 2) (const_int 3)
5776		     (const_int 2) (const_int 3)])))
5777   (set (match_operand:V2DF 0 "register_operand")
5778	(float:V2DF
5779	  (vec_select:V2SI
5780	  (match_dup 2)
5781	    (parallel [(const_int 0) (const_int 1)]))))]
5782  "TARGET_SSE2"
5783  "operands[2] = gen_reg_rtx (V4SImode);")
5784
5785(define_expand "vec_unpacks_float_lo_v4si"
5786  [(set (match_operand:V2DF 0 "register_operand")
5787	(float:V2DF
5788	  (vec_select:V2SI
5789	    (match_operand:V4SI 1 "vector_operand")
5790	    (parallel [(const_int 0) (const_int 1)]))))]
5791  "TARGET_SSE2")
5792
5793(define_expand "vec_unpacks_float_hi_v8si"
5794  [(set (match_dup 2)
5795	(vec_select:V4SI
5796	  (match_operand:V8SI 1 "vector_operand")
5797	  (parallel [(const_int 4) (const_int 5)
5798		     (const_int 6) (const_int 7)])))
5799   (set (match_operand:V4DF 0 "register_operand")
5800	(float:V4DF
5801	  (match_dup 2)))]
5802  "TARGET_AVX"
5803  "operands[2] = gen_reg_rtx (V4SImode);")
5804
5805(define_expand "vec_unpacks_float_lo_v8si"
5806  [(set (match_operand:V4DF 0 "register_operand")
5807	(float:V4DF
5808	  (vec_select:V4SI
5809	    (match_operand:V8SI 1 "nonimmediate_operand")
5810	    (parallel [(const_int 0) (const_int 1)
5811		       (const_int 2) (const_int 3)]))))]
5812  "TARGET_AVX")
5813
5814(define_expand "vec_unpacks_float_hi_v16si"
5815  [(set (match_dup 2)
5816	(vec_select:V8SI
5817	  (match_operand:V16SI 1 "nonimmediate_operand")
5818	  (parallel [(const_int 8) (const_int 9)
5819		     (const_int 10) (const_int 11)
5820		     (const_int 12) (const_int 13)
5821		     (const_int 14) (const_int 15)])))
5822   (set (match_operand:V8DF 0 "register_operand")
5823	(float:V8DF
5824	  (match_dup 2)))]
5825  "TARGET_AVX512F"
5826  "operands[2] = gen_reg_rtx (V8SImode);")
5827
5828(define_expand "vec_unpacks_float_lo_v16si"
5829  [(set (match_operand:V8DF 0 "register_operand")
5830	(float:V8DF
5831	  (vec_select:V8SI
5832	    (match_operand:V16SI 1 "nonimmediate_operand")
5833	    (parallel [(const_int 0) (const_int 1)
5834		       (const_int 2) (const_int 3)
5835		       (const_int 4) (const_int 5)
5836		       (const_int 6) (const_int 7)]))))]
5837  "TARGET_AVX512F")
5838
5839(define_expand "vec_unpacku_float_hi_v4si"
5840  [(set (match_dup 5)
5841	(vec_select:V4SI
5842	  (match_operand:V4SI 1 "vector_operand")
5843	  (parallel [(const_int 2) (const_int 3)
5844		     (const_int 2) (const_int 3)])))
5845   (set (match_dup 6)
5846	(float:V2DF
5847	  (vec_select:V2SI
5848	  (match_dup 5)
5849	    (parallel [(const_int 0) (const_int 1)]))))
5850   (set (match_dup 7)
5851	(lt:V2DF (match_dup 6) (match_dup 3)))
5852   (set (match_dup 8)
5853	(and:V2DF (match_dup 7) (match_dup 4)))
5854   (set (match_operand:V2DF 0 "register_operand")
5855	(plus:V2DF (match_dup 6) (match_dup 8)))]
5856  "TARGET_SSE2"
5857{
5858  REAL_VALUE_TYPE TWO32r;
5859  rtx x;
5860  int i;
5861
5862  real_ldexp (&TWO32r, &dconst1, 32);
5863  x = const_double_from_real_value (TWO32r, DFmode);
5864
5865  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5866  operands[4] = force_reg (V2DFmode,
5867			   ix86_build_const_vector (V2DFmode, 1, x));
5868
5869  operands[5] = gen_reg_rtx (V4SImode);
5870
5871  for (i = 6; i < 9; i++)
5872    operands[i] = gen_reg_rtx (V2DFmode);
5873})
5874
5875(define_expand "vec_unpacku_float_lo_v4si"
5876  [(set (match_dup 5)
5877	(float:V2DF
5878	  (vec_select:V2SI
5879	    (match_operand:V4SI 1 "vector_operand")
5880	    (parallel [(const_int 0) (const_int 1)]))))
5881   (set (match_dup 6)
5882	(lt:V2DF (match_dup 5) (match_dup 3)))
5883   (set (match_dup 7)
5884	(and:V2DF (match_dup 6) (match_dup 4)))
5885   (set (match_operand:V2DF 0 "register_operand")
5886	(plus:V2DF (match_dup 5) (match_dup 7)))]
5887  "TARGET_SSE2"
5888{
5889  REAL_VALUE_TYPE TWO32r;
5890  rtx x;
5891  int i;
5892
5893  real_ldexp (&TWO32r, &dconst1, 32);
5894  x = const_double_from_real_value (TWO32r, DFmode);
5895
5896  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5897  operands[4] = force_reg (V2DFmode,
5898			   ix86_build_const_vector (V2DFmode, 1, x));
5899
5900  for (i = 5; i < 8; i++)
5901    operands[i] = gen_reg_rtx (V2DFmode);
5902})
5903
5904(define_expand "vec_unpacku_float_hi_v8si"
5905  [(match_operand:V4DF 0 "register_operand")
5906   (match_operand:V8SI 1 "register_operand")]
5907  "TARGET_AVX"
5908{
5909  REAL_VALUE_TYPE TWO32r;
5910  rtx x, tmp[6];
5911  int i;
5912
5913  real_ldexp (&TWO32r, &dconst1, 32);
5914  x = const_double_from_real_value (TWO32r, DFmode);
5915
5916  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5917  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5918  tmp[5] = gen_reg_rtx (V4SImode);
5919
5920  for (i = 2; i < 5; i++)
5921    tmp[i] = gen_reg_rtx (V4DFmode);
5922  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5923  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5924  emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5925  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5926  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5927  DONE;
5928})
5929
5930(define_expand "vec_unpacku_float_hi_v16si"
5931  [(match_operand:V8DF 0 "register_operand")
5932   (match_operand:V16SI 1 "register_operand")]
5933  "TARGET_AVX512F"
5934{
5935  REAL_VALUE_TYPE TWO32r;
5936  rtx k, x, tmp[4];
5937
5938  real_ldexp (&TWO32r, &dconst1, 32);
5939  x = const_double_from_real_value (TWO32r, DFmode);
5940
5941  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5942  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5943  tmp[2] = gen_reg_rtx (V8DFmode);
5944  tmp[3] = gen_reg_rtx (V8SImode);
5945  k = gen_reg_rtx (QImode);
5946
5947  emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5948  emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5949  emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5950  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5951  emit_move_insn (operands[0], tmp[2]);
5952  DONE;
5953})
5954
5955(define_expand "vec_unpacku_float_lo_v8si"
5956  [(match_operand:V4DF 0 "register_operand")
5957   (match_operand:V8SI 1 "nonimmediate_operand")]
5958  "TARGET_AVX"
5959{
5960  REAL_VALUE_TYPE TWO32r;
5961  rtx x, tmp[5];
5962  int i;
5963
5964  real_ldexp (&TWO32r, &dconst1, 32);
5965  x = const_double_from_real_value (TWO32r, DFmode);
5966
5967  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5968  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5969
5970  for (i = 2; i < 5; i++)
5971    tmp[i] = gen_reg_rtx (V4DFmode);
5972  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5973  emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5974  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5975  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5976  DONE;
5977})
5978
5979(define_expand "vec_unpacku_float_lo_v16si"
5980  [(match_operand:V8DF 0 "register_operand")
5981   (match_operand:V16SI 1 "nonimmediate_operand")]
5982  "TARGET_AVX512F"
5983{
5984  REAL_VALUE_TYPE TWO32r;
5985  rtx k, x, tmp[3];
5986
5987  real_ldexp (&TWO32r, &dconst1, 32);
5988  x = const_double_from_real_value (TWO32r, DFmode);
5989
5990  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5991  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5992  tmp[2] = gen_reg_rtx (V8DFmode);
5993  k = gen_reg_rtx (QImode);
5994
5995  emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5996  emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5997  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5998  emit_move_insn (operands[0], tmp[2]);
5999  DONE;
6000})
6001
6002(define_expand "vec_pack_trunc_<mode>"
6003  [(set (match_dup 3)
6004	(float_truncate:<sf2dfmode>
6005	  (match_operand:VF2_512_256 1 "nonimmediate_operand")))
6006   (set (match_dup 4)
6007	(float_truncate:<sf2dfmode>
6008	  (match_operand:VF2_512_256 2 "nonimmediate_operand")))
6009   (set (match_operand:<ssePSmode> 0 "register_operand")
6010	(vec_concat:<ssePSmode>
6011	  (match_dup 3)
6012	  (match_dup 4)))]
6013  "TARGET_AVX"
6014{
6015  operands[3] = gen_reg_rtx (<sf2dfmode>mode);
6016  operands[4] = gen_reg_rtx (<sf2dfmode>mode);
6017})
6018
6019(define_expand "vec_pack_trunc_v2df"
6020  [(match_operand:V4SF 0 "register_operand")
6021   (match_operand:V2DF 1 "vector_operand")
6022   (match_operand:V2DF 2 "vector_operand")]
6023  "TARGET_SSE2"
6024{
6025  rtx tmp0, tmp1;
6026
6027  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6028    {
6029      tmp0 = gen_reg_rtx (V4DFmode);
6030      tmp1 = force_reg (V2DFmode, operands[1]);
6031
6032      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6033      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
6034    }
6035  else
6036    {
6037      tmp0 = gen_reg_rtx (V4SFmode);
6038      tmp1 = gen_reg_rtx (V4SFmode);
6039
6040      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
6041      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
6042      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
6043    }
6044  DONE;
6045})
6046
6047(define_expand "vec_pack_sfix_trunc_v8df"
6048  [(match_operand:V16SI 0 "register_operand")
6049   (match_operand:V8DF 1 "nonimmediate_operand")
6050   (match_operand:V8DF 2 "nonimmediate_operand")]
6051  "TARGET_AVX512F"
6052{
6053  rtx r1, r2;
6054
6055  r1 = gen_reg_rtx (V8SImode);
6056  r2 = gen_reg_rtx (V8SImode);
6057
6058  emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
6059  emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
6060  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6061  DONE;
6062})
6063
6064(define_expand "vec_pack_sfix_trunc_v4df"
6065  [(match_operand:V8SI 0 "register_operand")
6066   (match_operand:V4DF 1 "nonimmediate_operand")
6067   (match_operand:V4DF 2 "nonimmediate_operand")]
6068  "TARGET_AVX"
6069{
6070  rtx r1, r2;
6071
6072  r1 = gen_reg_rtx (V4SImode);
6073  r2 = gen_reg_rtx (V4SImode);
6074
6075  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6076  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6077  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6078  DONE;
6079})
6080
6081(define_expand "vec_pack_sfix_trunc_v2df"
6082  [(match_operand:V4SI 0 "register_operand")
6083   (match_operand:V2DF 1 "vector_operand")
6084   (match_operand:V2DF 2 "vector_operand")]
6085  "TARGET_SSE2"
6086{
6087  rtx tmp0, tmp1, tmp2;
6088
6089  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6090    {
6091      tmp0 = gen_reg_rtx (V4DFmode);
6092      tmp1 = force_reg (V2DFmode, operands[1]);
6093
6094      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6095      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6096    }
6097  else
6098    {
6099      tmp0 = gen_reg_rtx (V4SImode);
6100      tmp1 = gen_reg_rtx (V4SImode);
6101      tmp2 = gen_reg_rtx (V2DImode);
6102
6103      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6104      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6105      emit_insn (gen_vec_interleave_lowv2di (tmp2,
6106					     gen_lowpart (V2DImode, tmp0),
6107					     gen_lowpart (V2DImode, tmp1)));
6108      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6109    }
6110  DONE;
6111})
6112
6113(define_mode_attr ssepackfltmode
6114  [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6115
6116(define_expand "vec_pack_ufix_trunc_<mode>"
6117  [(match_operand:<ssepackfltmode> 0 "register_operand")
6118   (match_operand:VF2 1 "register_operand")
6119   (match_operand:VF2 2 "register_operand")]
6120  "TARGET_SSE2"
6121{
6122  if (<MODE>mode == V8DFmode)
6123    {
6124      rtx r1, r2;
6125
6126      r1 = gen_reg_rtx (V8SImode);
6127      r2 = gen_reg_rtx (V8SImode);
6128
6129      emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
6130      emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
6131      emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6132    }
6133  else
6134    {
6135      rtx tmp[7];
6136      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6137      tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6138      tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6139      emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6140      if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6141	{
6142	  tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6143	  ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6144	}
6145      else
6146	{
6147	  tmp[5] = gen_reg_rtx (V8SFmode);
6148	  ix86_expand_vec_extract_even_odd (tmp[5],
6149					    gen_lowpart (V8SFmode, tmp[2]),
6150					    gen_lowpart (V8SFmode, tmp[3]), 0);
6151	  tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6152	}
6153      tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6154				    operands[0], 0, OPTAB_DIRECT);
6155      if (tmp[6] != operands[0])
6156	emit_move_insn (operands[0], tmp[6]);
6157    }
6158
6159  DONE;
6160})
6161
6162(define_expand "avx512f_vec_pack_sfix_v8df"
6163  [(match_operand:V16SI 0 "register_operand")
6164   (match_operand:V8DF 1 "nonimmediate_operand")
6165   (match_operand:V8DF 2 "nonimmediate_operand")]
6166  "TARGET_AVX512F"
6167{
6168  rtx r1, r2;
6169
6170  r1 = gen_reg_rtx (V8SImode);
6171  r2 = gen_reg_rtx (V8SImode);
6172
6173  emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6174  emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6175  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6176  DONE;
6177})
6178
6179(define_expand "vec_pack_sfix_v4df"
6180  [(match_operand:V8SI 0 "register_operand")
6181   (match_operand:V4DF 1 "nonimmediate_operand")
6182   (match_operand:V4DF 2 "nonimmediate_operand")]
6183  "TARGET_AVX"
6184{
6185  rtx r1, r2;
6186
6187  r1 = gen_reg_rtx (V4SImode);
6188  r2 = gen_reg_rtx (V4SImode);
6189
6190  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6191  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6192  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6193  DONE;
6194})
6195
6196(define_expand "vec_pack_sfix_v2df"
6197  [(match_operand:V4SI 0 "register_operand")
6198   (match_operand:V2DF 1 "vector_operand")
6199   (match_operand:V2DF 2 "vector_operand")]
6200  "TARGET_SSE2"
6201{
6202  rtx tmp0, tmp1, tmp2;
6203
6204  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6205    {
6206      tmp0 = gen_reg_rtx (V4DFmode);
6207      tmp1 = force_reg (V2DFmode, operands[1]);
6208
6209      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6210      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6211    }
6212  else
6213    {
6214      tmp0 = gen_reg_rtx (V4SImode);
6215      tmp1 = gen_reg_rtx (V4SImode);
6216      tmp2 = gen_reg_rtx (V2DImode);
6217
6218      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6219      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6220      emit_insn (gen_vec_interleave_lowv2di (tmp2,
6221					     gen_lowpart (V2DImode, tmp0),
6222					     gen_lowpart (V2DImode, tmp1)));
6223      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6224    }
6225  DONE;
6226})
6227
6228;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6229;;
6230;; Parallel single-precision floating point element swizzling
6231;;
6232;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6233
6234(define_expand "sse_movhlps_exp"
6235  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6236	(vec_select:V4SF
6237	  (vec_concat:V8SF
6238	    (match_operand:V4SF 1 "nonimmediate_operand")
6239	    (match_operand:V4SF 2 "nonimmediate_operand"))
6240	  (parallel [(const_int 6)
6241		     (const_int 7)
6242		     (const_int 2)
6243		     (const_int 3)])))]
6244  "TARGET_SSE"
6245{
6246  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6247
6248  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6249
6250  /* Fix up the destination if needed.  */
6251  if (dst != operands[0])
6252    emit_move_insn (operands[0], dst);
6253
6254  DONE;
6255})
6256
6257(define_insn "sse_movhlps"
6258  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
6259	(vec_select:V4SF
6260	  (vec_concat:V8SF
6261	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6262	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6263	  (parallel [(const_int 6)
6264		     (const_int 7)
6265		     (const_int 2)
6266		     (const_int 3)])))]
6267  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6268  "@
6269   movhlps\t{%2, %0|%0, %2}
6270   vmovhlps\t{%2, %1, %0|%0, %1, %2}
6271   movlps\t{%H2, %0|%0, %H2}
6272   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6273   %vmovhps\t{%2, %0|%q0, %2}"
6274  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6275   (set_attr "type" "ssemov")
6276   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6277   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6278
6279(define_expand "sse_movlhps_exp"
6280  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6281	(vec_select:V4SF
6282	  (vec_concat:V8SF
6283	    (match_operand:V4SF 1 "nonimmediate_operand")
6284	    (match_operand:V4SF 2 "nonimmediate_operand"))
6285	  (parallel [(const_int 0)
6286		     (const_int 1)
6287		     (const_int 4)
6288		     (const_int 5)])))]
6289  "TARGET_SSE"
6290{
6291  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6292
6293  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6294
6295  /* Fix up the destination if needed.  */
6296  if (dst != operands[0])
6297    emit_move_insn (operands[0], dst);
6298
6299  DONE;
6300})
6301
6302(define_insn "sse_movlhps"
6303  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
6304	(vec_select:V4SF
6305	  (vec_concat:V8SF
6306	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6307	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6308	  (parallel [(const_int 0)
6309		     (const_int 1)
6310		     (const_int 4)
6311		     (const_int 5)])))]
6312  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6313  "@
6314   movlhps\t{%2, %0|%0, %2}
6315   vmovlhps\t{%2, %1, %0|%0, %1, %2}
6316   movhps\t{%2, %0|%0, %q2}
6317   vmovhps\t{%2, %1, %0|%0, %1, %q2}
6318   %vmovlps\t{%2, %H0|%H0, %2}"
6319  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6320   (set_attr "type" "ssemov")
6321   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6322   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6323
6324(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6325  [(set (match_operand:V16SF 0 "register_operand" "=v")
6326	(vec_select:V16SF
6327	  (vec_concat:V32SF
6328	    (match_operand:V16SF 1 "register_operand" "v")
6329	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6330	  (parallel [(const_int 2) (const_int 18)
6331		     (const_int 3) (const_int 19)
6332		     (const_int 6) (const_int 22)
6333		     (const_int 7) (const_int 23)
6334		     (const_int 10) (const_int 26)
6335		     (const_int 11) (const_int 27)
6336		     (const_int 14) (const_int 30)
6337		     (const_int 15) (const_int 31)])))]
6338  "TARGET_AVX512F"
6339  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6340  [(set_attr "type" "sselog")
6341   (set_attr "prefix" "evex")
6342   (set_attr "mode" "V16SF")])
6343
6344;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6345(define_insn "avx_unpckhps256<mask_name>"
6346  [(set (match_operand:V8SF 0 "register_operand" "=v")
6347	(vec_select:V8SF
6348	  (vec_concat:V16SF
6349	    (match_operand:V8SF 1 "register_operand" "v")
6350	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6351	  (parallel [(const_int 2) (const_int 10)
6352		     (const_int 3) (const_int 11)
6353		     (const_int 6) (const_int 14)
6354		     (const_int 7) (const_int 15)])))]
6355  "TARGET_AVX && <mask_avx512vl_condition>"
6356  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6357  [(set_attr "type" "sselog")
6358   (set_attr "prefix" "vex")
6359   (set_attr "mode" "V8SF")])
6360
6361(define_expand "vec_interleave_highv8sf"
6362  [(set (match_dup 3)
6363	(vec_select:V8SF
6364	  (vec_concat:V16SF
6365	    (match_operand:V8SF 1 "register_operand")
6366	    (match_operand:V8SF 2 "nonimmediate_operand"))
6367	  (parallel [(const_int 0) (const_int 8)
6368		     (const_int 1) (const_int 9)
6369		     (const_int 4) (const_int 12)
6370		     (const_int 5) (const_int 13)])))
6371   (set (match_dup 4)
6372	(vec_select:V8SF
6373	  (vec_concat:V16SF
6374	    (match_dup 1)
6375	    (match_dup 2))
6376	  (parallel [(const_int 2) (const_int 10)
6377		     (const_int 3) (const_int 11)
6378		     (const_int 6) (const_int 14)
6379		     (const_int 7) (const_int 15)])))
6380   (set (match_operand:V8SF 0 "register_operand")
6381	(vec_select:V8SF
6382	  (vec_concat:V16SF
6383	    (match_dup 3)
6384	    (match_dup 4))
6385	  (parallel [(const_int 4) (const_int 5)
6386		     (const_int 6) (const_int 7)
6387		     (const_int 12) (const_int 13)
6388		     (const_int 14) (const_int 15)])))]
6389 "TARGET_AVX"
6390{
6391  operands[3] = gen_reg_rtx (V8SFmode);
6392  operands[4] = gen_reg_rtx (V8SFmode);
6393})
6394
6395(define_insn "vec_interleave_highv4sf<mask_name>"
6396  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6397	(vec_select:V4SF
6398	  (vec_concat:V8SF
6399	    (match_operand:V4SF 1 "register_operand" "0,v")
6400	    (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6401	  (parallel [(const_int 2) (const_int 6)
6402		     (const_int 3) (const_int 7)])))]
6403  "TARGET_SSE && <mask_avx512vl_condition>"
6404  "@
6405   unpckhps\t{%2, %0|%0, %2}
6406   vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6407  [(set_attr "isa" "noavx,avx")
6408   (set_attr "type" "sselog")
6409   (set_attr "prefix" "orig,vex")
6410   (set_attr "mode" "V4SF")])
6411
6412(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6413  [(set (match_operand:V16SF 0 "register_operand" "=v")
6414	(vec_select:V16SF
6415	  (vec_concat:V32SF
6416	    (match_operand:V16SF 1 "register_operand" "v")
6417	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6418	  (parallel [(const_int 0) (const_int 16)
6419		     (const_int 1) (const_int 17)
6420		     (const_int 4) (const_int 20)
6421		     (const_int 5) (const_int 21)
6422		     (const_int 8) (const_int 24)
6423		     (const_int 9) (const_int 25)
6424		     (const_int 12) (const_int 28)
6425		     (const_int 13) (const_int 29)])))]
6426  "TARGET_AVX512F"
6427  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6428  [(set_attr "type" "sselog")
6429   (set_attr "prefix" "evex")
6430   (set_attr "mode" "V16SF")])
6431
6432;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6433(define_insn "avx_unpcklps256<mask_name>"
6434  [(set (match_operand:V8SF 0 "register_operand" "=v")
6435	(vec_select:V8SF
6436	  (vec_concat:V16SF
6437	    (match_operand:V8SF 1 "register_operand" "v")
6438	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6439	  (parallel [(const_int 0) (const_int 8)
6440		     (const_int 1) (const_int 9)
6441		     (const_int 4) (const_int 12)
6442		     (const_int 5) (const_int 13)])))]
6443  "TARGET_AVX && <mask_avx512vl_condition>"
6444  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6445  [(set_attr "type" "sselog")
6446   (set_attr "prefix" "vex")
6447   (set_attr "mode" "V8SF")])
6448
6449(define_insn "unpcklps128_mask"
6450  [(set (match_operand:V4SF 0 "register_operand" "=v")
6451	(vec_merge:V4SF
6452	  (vec_select:V4SF
6453	    (vec_concat:V8SF
6454	      (match_operand:V4SF 1 "register_operand" "v")
6455	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6456	    (parallel [(const_int 0) (const_int 4)
6457		      (const_int 1) (const_int 5)]))
6458	  (match_operand:V4SF 3 "vector_move_operand" "0C")
6459	  (match_operand:QI 4 "register_operand" "Yk")))]
6460  "TARGET_AVX512VL"
6461  "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6462  [(set_attr "type" "sselog")
6463   (set_attr "prefix" "evex")
6464   (set_attr "mode" "V4SF")])
6465
6466(define_expand "vec_interleave_lowv8sf"
6467  [(set (match_dup 3)
6468	(vec_select:V8SF
6469	  (vec_concat:V16SF
6470	    (match_operand:V8SF 1 "register_operand")
6471	    (match_operand:V8SF 2 "nonimmediate_operand"))
6472	  (parallel [(const_int 0) (const_int 8)
6473		     (const_int 1) (const_int 9)
6474		     (const_int 4) (const_int 12)
6475		     (const_int 5) (const_int 13)])))
6476   (set (match_dup 4)
6477	(vec_select:V8SF
6478	  (vec_concat:V16SF
6479	    (match_dup 1)
6480	    (match_dup 2))
6481	  (parallel [(const_int 2) (const_int 10)
6482		     (const_int 3) (const_int 11)
6483		     (const_int 6) (const_int 14)
6484		     (const_int 7) (const_int 15)])))
6485   (set (match_operand:V8SF 0 "register_operand")
6486	(vec_select:V8SF
6487	  (vec_concat:V16SF
6488	    (match_dup 3)
6489	    (match_dup 4))
6490	  (parallel [(const_int 0) (const_int 1)
6491		     (const_int 2) (const_int 3)
6492		     (const_int 8) (const_int 9)
6493		     (const_int 10) (const_int 11)])))]
6494 "TARGET_AVX"
6495{
6496  operands[3] = gen_reg_rtx (V8SFmode);
6497  operands[4] = gen_reg_rtx (V8SFmode);
6498})
6499
6500(define_insn "vec_interleave_lowv4sf"
6501  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6502	(vec_select:V4SF
6503	  (vec_concat:V8SF
6504	    (match_operand:V4SF 1 "register_operand" "0,v")
6505	    (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6506	  (parallel [(const_int 0) (const_int 4)
6507		     (const_int 1) (const_int 5)])))]
6508  "TARGET_SSE"
6509  "@
6510   unpcklps\t{%2, %0|%0, %2}
6511   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6512  [(set_attr "isa" "noavx,avx")
6513   (set_attr "type" "sselog")
6514   (set_attr "prefix" "orig,maybe_evex")
6515   (set_attr "mode" "V4SF")])
6516
6517;; These are modeled with the same vec_concat as the others so that we
6518;; capture users of shufps that can use the new instructions
6519(define_insn "avx_movshdup256<mask_name>"
6520  [(set (match_operand:V8SF 0 "register_operand" "=v")
6521	(vec_select:V8SF
6522	  (vec_concat:V16SF
6523	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6524	    (match_dup 1))
6525	  (parallel [(const_int 1) (const_int 1)
6526		     (const_int 3) (const_int 3)
6527		     (const_int 5) (const_int 5)
6528		     (const_int 7) (const_int 7)])))]
6529  "TARGET_AVX && <mask_avx512vl_condition>"
6530  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6531  [(set_attr "type" "sse")
6532   (set_attr "prefix" "vex")
6533   (set_attr "mode" "V8SF")])
6534
6535(define_insn "sse3_movshdup<mask_name>"
6536  [(set (match_operand:V4SF 0 "register_operand" "=v")
6537	(vec_select:V4SF
6538	  (vec_concat:V8SF
6539	    (match_operand:V4SF 1 "vector_operand" "vBm")
6540	    (match_dup 1))
6541	  (parallel [(const_int 1)
6542		     (const_int 1)
6543		     (const_int 7)
6544		     (const_int 7)])))]
6545  "TARGET_SSE3 && <mask_avx512vl_condition>"
6546  "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6547  [(set_attr "type" "sse")
6548   (set_attr "prefix_rep" "1")
6549   (set_attr "prefix" "maybe_vex")
6550   (set_attr "mode" "V4SF")])
6551
6552(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6553  [(set (match_operand:V16SF 0 "register_operand" "=v")
6554	(vec_select:V16SF
6555	  (vec_concat:V32SF
6556	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6557	    (match_dup 1))
6558	  (parallel [(const_int 1) (const_int 1)
6559		     (const_int 3) (const_int 3)
6560		     (const_int 5) (const_int 5)
6561		     (const_int 7) (const_int 7)
6562		     (const_int 9) (const_int 9)
6563		     (const_int 11) (const_int 11)
6564		     (const_int 13) (const_int 13)
6565		     (const_int 15) (const_int 15)])))]
6566  "TARGET_AVX512F"
6567  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6568  [(set_attr "type" "sse")
6569   (set_attr "prefix" "evex")
6570   (set_attr "mode" "V16SF")])
6571
6572(define_insn "avx_movsldup256<mask_name>"
6573  [(set (match_operand:V8SF 0 "register_operand" "=v")
6574	(vec_select:V8SF
6575	  (vec_concat:V16SF
6576	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6577	    (match_dup 1))
6578	  (parallel [(const_int 0) (const_int 0)
6579		     (const_int 2) (const_int 2)
6580		     (const_int 4) (const_int 4)
6581		     (const_int 6) (const_int 6)])))]
6582  "TARGET_AVX && <mask_avx512vl_condition>"
6583  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6584  [(set_attr "type" "sse")
6585   (set_attr "prefix" "vex")
6586   (set_attr "mode" "V8SF")])
6587
6588(define_insn "sse3_movsldup<mask_name>"
6589  [(set (match_operand:V4SF 0 "register_operand" "=v")
6590	(vec_select:V4SF
6591	  (vec_concat:V8SF
6592	    (match_operand:V4SF 1 "vector_operand" "vBm")
6593	    (match_dup 1))
6594	  (parallel [(const_int 0)
6595		     (const_int 0)
6596		     (const_int 6)
6597		     (const_int 6)])))]
6598  "TARGET_SSE3 && <mask_avx512vl_condition>"
6599  "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6600  [(set_attr "type" "sse")
6601   (set_attr "prefix_rep" "1")
6602   (set_attr "prefix" "maybe_vex")
6603   (set_attr "mode" "V4SF")])
6604
6605(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6606  [(set (match_operand:V16SF 0 "register_operand" "=v")
6607	(vec_select:V16SF
6608	  (vec_concat:V32SF
6609	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6610	    (match_dup 1))
6611	  (parallel [(const_int 0) (const_int 0)
6612		     (const_int 2) (const_int 2)
6613		     (const_int 4) (const_int 4)
6614		     (const_int 6) (const_int 6)
6615		     (const_int 8) (const_int 8)
6616		     (const_int 10) (const_int 10)
6617		     (const_int 12) (const_int 12)
6618		     (const_int 14) (const_int 14)])))]
6619  "TARGET_AVX512F"
6620  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6621  [(set_attr "type" "sse")
6622   (set_attr "prefix" "evex")
6623   (set_attr "mode" "V16SF")])
6624
6625(define_expand "avx_shufps256<mask_expand4_name>"
6626  [(match_operand:V8SF 0 "register_operand")
6627   (match_operand:V8SF 1 "register_operand")
6628   (match_operand:V8SF 2 "nonimmediate_operand")
6629   (match_operand:SI 3 "const_int_operand")]
6630  "TARGET_AVX"
6631{
6632  int mask = INTVAL (operands[3]);
6633  emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6634						     operands[1],
6635						     operands[2],
6636						     GEN_INT ((mask >> 0) & 3),
6637						     GEN_INT ((mask >> 2) & 3),
6638						     GEN_INT (((mask >> 4) & 3) + 8),
6639						     GEN_INT (((mask >> 6) & 3) + 8),
6640						     GEN_INT (((mask >> 0) & 3) + 4),
6641						     GEN_INT (((mask >> 2) & 3) + 4),
6642						     GEN_INT (((mask >> 4) & 3) + 12),
6643						     GEN_INT (((mask >> 6) & 3) + 12)
6644						     <mask_expand4_args>));
6645  DONE;
6646})
6647
6648;; One bit in mask selects 2 elements.
6649(define_insn "avx_shufps256_1<mask_name>"
6650  [(set (match_operand:V8SF 0 "register_operand" "=v")
6651	(vec_select:V8SF
6652	  (vec_concat:V16SF
6653	    (match_operand:V8SF 1 "register_operand" "v")
6654	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6655	  (parallel [(match_operand 3  "const_0_to_3_operand"  )
6656		     (match_operand 4  "const_0_to_3_operand"  )
6657		     (match_operand 5  "const_8_to_11_operand" )
6658		     (match_operand 6  "const_8_to_11_operand" )
6659		     (match_operand 7  "const_4_to_7_operand"  )
6660		     (match_operand 8  "const_4_to_7_operand"  )
6661		     (match_operand 9  "const_12_to_15_operand")
6662		     (match_operand 10 "const_12_to_15_operand")])))]
6663  "TARGET_AVX
6664   && <mask_avx512vl_condition>
6665   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6666       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6667       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6668       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6669{
6670  int mask;
6671  mask = INTVAL (operands[3]);
6672  mask |= INTVAL (operands[4]) << 2;
6673  mask |= (INTVAL (operands[5]) - 8) << 4;
6674  mask |= (INTVAL (operands[6]) - 8) << 6;
6675  operands[3] = GEN_INT (mask);
6676
6677  return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6678}
6679  [(set_attr "type" "sseshuf")
6680   (set_attr "length_immediate" "1")
6681   (set_attr "prefix" "<mask_prefix>")
6682   (set_attr "mode" "V8SF")])
6683
6684(define_expand "sse_shufps<mask_expand4_name>"
6685  [(match_operand:V4SF 0 "register_operand")
6686   (match_operand:V4SF 1 "register_operand")
6687   (match_operand:V4SF 2 "vector_operand")
6688   (match_operand:SI 3 "const_int_operand")]
6689  "TARGET_SSE"
6690{
6691  int mask = INTVAL (operands[3]);
6692  emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6693						     operands[1],
6694						     operands[2],
6695						     GEN_INT ((mask >> 0) & 3),
6696						     GEN_INT ((mask >> 2) & 3),
6697						     GEN_INT (((mask >> 4) & 3) + 4),
6698						     GEN_INT (((mask >> 6) & 3) + 4)
6699						     <mask_expand4_args>));
6700  DONE;
6701})
6702
6703(define_insn "sse_shufps_v4sf_mask"
6704  [(set (match_operand:V4SF 0 "register_operand" "=v")
6705    (vec_merge:V4SF
6706	  (vec_select:V4SF
6707	    (vec_concat:V8SF
6708	      (match_operand:V4SF 1 "register_operand" "v")
6709	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6710	    (parallel [(match_operand 3 "const_0_to_3_operand")
6711	               (match_operand 4 "const_0_to_3_operand")
6712	               (match_operand 5 "const_4_to_7_operand")
6713	               (match_operand 6 "const_4_to_7_operand")]))
6714      (match_operand:V4SF 7 "vector_move_operand" "0C")
6715      (match_operand:QI 8 "register_operand" "Yk")))]
6716  "TARGET_AVX512VL"
6717{
6718  int mask = 0;
6719  mask |= INTVAL (operands[3]) << 0;
6720  mask |= INTVAL (operands[4]) << 2;
6721  mask |= (INTVAL (operands[5]) - 4) << 4;
6722  mask |= (INTVAL (operands[6]) - 4) << 6;
6723  operands[3] = GEN_INT (mask);
6724
6725  return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6726}
6727  [(set_attr "type" "sseshuf")
6728   (set_attr "length_immediate" "1")
6729   (set_attr "prefix" "evex")
6730   (set_attr "mode" "V4SF")])
6731
6732(define_insn "sse_shufps_<mode>"
6733  [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6734	(vec_select:VI4F_128
6735	  (vec_concat:<ssedoublevecmode>
6736	    (match_operand:VI4F_128 1 "register_operand" "0,v")
6737	    (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6738	  (parallel [(match_operand 3 "const_0_to_3_operand")
6739		     (match_operand 4 "const_0_to_3_operand")
6740		     (match_operand 5 "const_4_to_7_operand")
6741		     (match_operand 6 "const_4_to_7_operand")])))]
6742  "TARGET_SSE"
6743{
6744  int mask = 0;
6745  mask |= INTVAL (operands[3]) << 0;
6746  mask |= INTVAL (operands[4]) << 2;
6747  mask |= (INTVAL (operands[5]) - 4) << 4;
6748  mask |= (INTVAL (operands[6]) - 4) << 6;
6749  operands[3] = GEN_INT (mask);
6750
6751  switch (which_alternative)
6752    {
6753    case 0:
6754      return "shufps\t{%3, %2, %0|%0, %2, %3}";
6755    case 1:
6756      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6757    default:
6758      gcc_unreachable ();
6759    }
6760}
6761  [(set_attr "isa" "noavx,avx")
6762   (set_attr "type" "sseshuf")
6763   (set_attr "length_immediate" "1")
6764   (set_attr "prefix" "orig,maybe_evex")
6765   (set_attr "mode" "V4SF")])
6766
6767(define_insn "sse_storehps"
6768  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6769	(vec_select:V2SF
6770	  (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
6771	  (parallel [(const_int 2) (const_int 3)])))]
6772  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6773  "@
6774   %vmovhps\t{%1, %0|%q0, %1}
6775   %vmovhlps\t{%1, %d0|%d0, %1}
6776   %vmovlps\t{%H1, %d0|%d0, %H1}"
6777  [(set_attr "type" "ssemov")
6778   (set_attr "prefix" "maybe_vex")
6779   (set_attr "mode" "V2SF,V4SF,V2SF")])
6780
6781(define_expand "sse_loadhps_exp"
6782  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6783	(vec_concat:V4SF
6784	  (vec_select:V2SF
6785	    (match_operand:V4SF 1 "nonimmediate_operand")
6786	    (parallel [(const_int 0) (const_int 1)]))
6787	  (match_operand:V2SF 2 "nonimmediate_operand")))]
6788  "TARGET_SSE"
6789{
6790  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6791
6792  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6793
6794  /* Fix up the destination if needed.  */
6795  if (dst != operands[0])
6796    emit_move_insn (operands[0], dst);
6797
6798  DONE;
6799})
6800
6801(define_insn "sse_loadhps"
6802  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
6803	(vec_concat:V4SF
6804	  (vec_select:V2SF
6805	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6806	    (parallel [(const_int 0) (const_int 1)]))
6807	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,v,v")))]
6808  "TARGET_SSE"
6809  "@
6810   movhps\t{%2, %0|%0, %q2}
6811   vmovhps\t{%2, %1, %0|%0, %1, %q2}
6812   movlhps\t{%2, %0|%0, %2}
6813   vmovlhps\t{%2, %1, %0|%0, %1, %2}
6814   %vmovlps\t{%2, %H0|%H0, %2}"
6815  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6816   (set_attr "type" "ssemov")
6817   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6818   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6819
6820(define_insn "sse_storelps"
6821  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,v,v")
6822	(vec_select:V2SF
6823	  (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
6824	  (parallel [(const_int 0) (const_int 1)])))]
6825  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6826  "@
6827   %vmovlps\t{%1, %0|%q0, %1}
6828   %vmovaps\t{%1, %0|%0, %1}
6829   %vmovlps\t{%1, %d0|%d0, %q1}"
6830  [(set_attr "type" "ssemov")
6831   (set_attr "prefix" "maybe_vex")
6832   (set_attr "mode" "V2SF,V4SF,V2SF")])
6833
6834(define_expand "sse_loadlps_exp"
6835  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6836	(vec_concat:V4SF
6837	  (match_operand:V2SF 2 "nonimmediate_operand")
6838	  (vec_select:V2SF
6839	    (match_operand:V4SF 1 "nonimmediate_operand")
6840	    (parallel [(const_int 2) (const_int 3)]))))]
6841  "TARGET_SSE"
6842{
6843  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6844
6845  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6846
6847  /* Fix up the destination if needed.  */
6848  if (dst != operands[0])
6849    emit_move_insn (operands[0], dst);
6850
6851  DONE;
6852})
6853
6854(define_insn "sse_loadlps"
6855  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
6856	(vec_concat:V4SF
6857	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,v,m,m,v")
6858	  (vec_select:V2SF
6859	    (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
6860	    (parallel [(const_int 2) (const_int 3)]))))]
6861  "TARGET_SSE"
6862  "@
6863   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6864   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6865   movlps\t{%2, %0|%0, %q2}
6866   vmovlps\t{%2, %1, %0|%0, %1, %q2}
6867   %vmovlps\t{%2, %0|%q0, %2}"
6868  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6869   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6870   (set (attr "length_immediate")
6871     (if_then_else (eq_attr "alternative" "0,1")
6872		   (const_string "1")
6873		   (const_string "*")))
6874   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6875   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6876
6877(define_insn "sse_movss"
6878  [(set (match_operand:V4SF 0 "register_operand"   "=x,v")
6879	(vec_merge:V4SF
6880	  (match_operand:V4SF 2 "register_operand" " x,v")
6881	  (match_operand:V4SF 1 "register_operand" " 0,v")
6882	  (const_int 1)))]
6883  "TARGET_SSE"
6884  "@
6885   movss\t{%2, %0|%0, %2}
6886   vmovss\t{%2, %1, %0|%0, %1, %2}"
6887  [(set_attr "isa" "noavx,avx")
6888   (set_attr "type" "ssemov")
6889   (set_attr "prefix" "orig,maybe_evex")
6890   (set_attr "mode" "SF")])
6891
6892(define_insn "avx2_vec_dup<mode>"
6893  [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
6894	(vec_duplicate:VF1_128_256
6895	  (vec_select:SF
6896	    (match_operand:V4SF 1 "register_operand" "v")
6897	    (parallel [(const_int 0)]))))]
6898  "TARGET_AVX2"
6899  "vbroadcastss\t{%1, %0|%0, %1}"
6900  [(set_attr "type" "sselog1")
6901    (set_attr "prefix" "maybe_evex")
6902    (set_attr "mode" "<MODE>")])
6903
6904(define_insn "avx2_vec_dupv8sf_1"
6905  [(set (match_operand:V8SF 0 "register_operand" "=v")
6906	(vec_duplicate:V8SF
6907	  (vec_select:SF
6908	    (match_operand:V8SF 1 "register_operand" "v")
6909	    (parallel [(const_int 0)]))))]
6910  "TARGET_AVX2"
6911  "vbroadcastss\t{%x1, %0|%0, %x1}"
6912  [(set_attr "type" "sselog1")
6913    (set_attr "prefix" "maybe_evex")
6914    (set_attr "mode" "V8SF")])
6915
6916(define_insn "avx512f_vec_dup<mode>_1"
6917  [(set (match_operand:VF_512 0 "register_operand" "=v")
6918	(vec_duplicate:VF_512
6919	  (vec_select:<ssescalarmode>
6920	    (match_operand:VF_512 1 "register_operand" "v")
6921	    (parallel [(const_int 0)]))))]
6922  "TARGET_AVX512F"
6923  "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6924  [(set_attr "type" "sselog1")
6925    (set_attr "prefix" "evex")
6926    (set_attr "mode" "<MODE>")])
6927
6928;; Although insertps takes register source, we prefer
6929;; unpcklps with register source since it is shorter.
6930(define_insn "*vec_concatv2sf_sse4_1"
6931  [(set (match_operand:V2SF 0 "register_operand"
6932	  "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
6933	(vec_concat:V2SF
6934	  (match_operand:SF 1 "nonimmediate_operand"
6935	  "  0, 0,Yv, 0,0, v,m, 0 , m")
6936	  (match_operand:SF 2 "vector_move_operand"
6937	  " Yr,*x,Yv, m,m, m,C,*ym, C")))]
6938  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6939  "@
6940   unpcklps\t{%2, %0|%0, %2}
6941   unpcklps\t{%2, %0|%0, %2}
6942   vunpcklps\t{%2, %1, %0|%0, %1, %2}
6943   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6944   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6945   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6946   %vmovss\t{%1, %0|%0, %1}
6947   punpckldq\t{%2, %0|%0, %2}
6948   movd\t{%1, %0|%0, %1}"
6949  [(set (attr "isa")
6950     (cond [(eq_attr "alternative" "0,1,3,4")
6951	      (const_string "noavx")
6952	    (eq_attr "alternative" "2,5")
6953	      (const_string "avx")
6954	   ]
6955	   (const_string "*")))
6956   (set (attr "type")
6957     (cond [(eq_attr "alternative" "6")
6958	      (const_string "ssemov")
6959	    (eq_attr "alternative" "7")
6960	      (const_string "mmxcvt")
6961	    (eq_attr "alternative" "8")
6962	      (const_string "mmxmov")
6963	   ]
6964	   (const_string "sselog")))
6965   (set (attr "prefix_data16")
6966     (if_then_else (eq_attr "alternative" "3,4")
6967		   (const_string "1")
6968		   (const_string "*")))
6969   (set (attr "prefix_extra")
6970     (if_then_else (eq_attr "alternative" "3,4,5")
6971		   (const_string "1")
6972		   (const_string "*")))
6973   (set (attr "length_immediate")
6974     (if_then_else (eq_attr "alternative" "3,4,5")
6975		   (const_string "1")
6976		   (const_string "*")))
6977   (set (attr "prefix")
6978     (cond [(eq_attr "alternative" "2,5")
6979	      (const_string "maybe_evex")
6980	    (eq_attr "alternative" "6")
6981	      (const_string "maybe_vex")
6982	   ]
6983	   (const_string "orig")))
6984   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6985
6986;; ??? In theory we can match memory for the MMX alternative, but allowing
6987;; vector_operand for operand 2 and *not* allowing memory for the SSE
6988;; alternatives pretty much forces the MMX alternative to be chosen.
6989(define_insn "*vec_concatv2sf_sse"
6990  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
6991	(vec_concat:V2SF
6992	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6993	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
6994  "TARGET_SSE"
6995  "@
6996   unpcklps\t{%2, %0|%0, %2}
6997   movss\t{%1, %0|%0, %1}
6998   punpckldq\t{%2, %0|%0, %2}
6999   movd\t{%1, %0|%0, %1}"
7000  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7001   (set_attr "mode" "V4SF,SF,DI,DI")])
7002
7003(define_insn "*vec_concatv4sf"
7004  [(set (match_operand:V4SF 0 "register_operand"       "=x,v,x,v")
7005	(vec_concat:V4SF
7006	  (match_operand:V2SF 1 "register_operand"     " 0,v,0,v")
7007	  (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
7008  "TARGET_SSE"
7009  "@
7010   movlhps\t{%2, %0|%0, %2}
7011   vmovlhps\t{%2, %1, %0|%0, %1, %2}
7012   movhps\t{%2, %0|%0, %q2}
7013   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7014  [(set_attr "isa" "noavx,avx,noavx,avx")
7015   (set_attr "type" "ssemov")
7016   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
7017   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
7018
7019;; Avoid combining registers from different units in a single alternative,
7020;; see comment above inline_secondary_memory_needed function in i386.c
7021(define_insn "vec_set<mode>_0"
7022  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
7023	  "=Yr,*x,v,v,Yi,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
7024	(vec_merge:VI4F_128
7025	  (vec_duplicate:VI4F_128
7026	    (match_operand:<ssescalarmode> 2 "general_operand"
7027	  " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
7028	  (match_operand:VI4F_128 1 "vector_move_operand"
7029	  " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
7030	  (const_int 1)))]
7031  "TARGET_SSE"
7032  "@
7033   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7034   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7035   vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
7036   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
7037   %vmovd\t{%2, %0|%0, %2}
7038   movss\t{%2, %0|%0, %2}
7039   movss\t{%2, %0|%0, %2}
7040   vmovss\t{%2, %1, %0|%0, %1, %2}
7041   pinsrd\t{$0, %2, %0|%0, %2, 0}
7042   pinsrd\t{$0, %2, %0|%0, %2, 0}
7043   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
7044   #
7045   #
7046   #"
7047  [(set (attr "isa")
7048     (cond [(eq_attr "alternative" "0,1,8,9")
7049	      (const_string "sse4_noavx")
7050	    (eq_attr "alternative" "2,7,10")
7051	      (const_string "avx")
7052	    (eq_attr "alternative" "3,4")
7053	      (const_string "sse2")
7054	    (eq_attr "alternative" "5,6")
7055	      (const_string "noavx")
7056	   ]
7057	   (const_string "*")))
7058   (set (attr "type")
7059     (cond [(eq_attr "alternative" "0,1,2,8,9,10")
7060	      (const_string "sselog")
7061	    (eq_attr "alternative" "12")
7062	      (const_string "imov")
7063	    (eq_attr "alternative" "13")
7064	      (const_string "fmov")
7065	   ]
7066	   (const_string "ssemov")))
7067   (set (attr "prefix_extra")
7068     (if_then_else (eq_attr "alternative" "8,9,10")
7069		   (const_string "1")
7070		   (const_string "*")))
7071   (set (attr "length_immediate")
7072     (if_then_else (eq_attr "alternative" "8,9,10")
7073		   (const_string "1")
7074		   (const_string "*")))
7075   (set (attr "prefix")
7076     (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7077	      (const_string "orig")
7078	    (eq_attr "alternative" "2")
7079	      (const_string "maybe_evex")
7080	    (eq_attr "alternative" "3,4")
7081	      (const_string "maybe_vex")
7082	    (eq_attr "alternative" "7,10")
7083	      (const_string "vex")
7084	   ]
7085	   (const_string "*")))
7086   (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
7087
7088;; A subset is vec_setv4sf.
7089(define_insn "*vec_setv4sf_sse4_1"
7090  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7091	(vec_merge:V4SF
7092	  (vec_duplicate:V4SF
7093	    (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7094	  (match_operand:V4SF 1 "register_operand" "0,0,v")
7095	  (match_operand:SI 3 "const_int_operand")))]
7096  "TARGET_SSE4_1
7097   && ((unsigned) exact_log2 (INTVAL (operands[3]))
7098       < GET_MODE_NUNITS (V4SFmode))"
7099{
7100  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7101  switch (which_alternative)
7102    {
7103    case 0:
7104    case 1:
7105      return "insertps\t{%3, %2, %0|%0, %2, %3}";
7106    case 2:
7107      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7108    default:
7109      gcc_unreachable ();
7110    }
7111}
7112  [(set_attr "isa" "noavx,noavx,avx")
7113   (set_attr "type" "sselog")
7114   (set_attr "prefix_data16" "1,1,*")
7115   (set_attr "prefix_extra" "1")
7116   (set_attr "length_immediate" "1")
7117   (set_attr "prefix" "orig,orig,maybe_evex")
7118   (set_attr "mode" "V4SF")])
7119
7120;; All of vinsertps, vmovss, vmovd clear also the higher bits.
7121(define_insn "vec_set<mode>_0"
7122  [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,Yi")
7123	(vec_merge:VI4F_256_512
7124	  (vec_duplicate:VI4F_256_512
7125	    (match_operand:<ssescalarmode> 2 "general_operand" "v,m,r"))
7126	  (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
7127	  (const_int 1)))]
7128  "TARGET_AVX"
7129  "@
7130   vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
7131   vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
7132   vmovd\t{%2, %x0|%x0, %2}"
7133  [(set (attr "type")
7134     (if_then_else (eq_attr "alternative" "0")
7135		   (const_string "sselog")
7136		   (const_string "ssemov")))
7137   (set_attr "prefix" "maybe_evex")
7138   (set_attr "mode" "SF,<ssescalarmode>,SI")])
7139
7140(define_insn "sse4_1_insertps"
7141  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7142	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7143		      (match_operand:V4SF 1 "register_operand" "0,0,v")
7144		      (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7145		     UNSPEC_INSERTPS))]
7146  "TARGET_SSE4_1"
7147{
7148  if (MEM_P (operands[2]))
7149    {
7150      unsigned count_s = INTVAL (operands[3]) >> 6;
7151      if (count_s)
7152	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7153      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7154    }
7155  switch (which_alternative)
7156    {
7157    case 0:
7158    case 1:
7159      return "insertps\t{%3, %2, %0|%0, %2, %3}";
7160    case 2:
7161      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7162    default:
7163      gcc_unreachable ();
7164    }
7165}
7166  [(set_attr "isa" "noavx,noavx,avx")
7167   (set_attr "type" "sselog")
7168   (set_attr "prefix_data16" "1,1,*")
7169   (set_attr "prefix_extra" "1")
7170   (set_attr "length_immediate" "1")
7171   (set_attr "prefix" "orig,orig,maybe_evex")
7172   (set_attr "mode" "V4SF")])
7173
7174(define_split
7175  [(set (match_operand:VI4F_128 0 "memory_operand")
7176	(vec_merge:VI4F_128
7177	  (vec_duplicate:VI4F_128
7178	    (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7179	  (match_dup 0)
7180	  (const_int 1)))]
7181  "TARGET_SSE && reload_completed"
7182  [(set (match_dup 0) (match_dup 1))]
7183  "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7184
7185(define_expand "vec_set<mode>"
7186  [(match_operand:V 0 "register_operand")
7187   (match_operand:<ssescalarmode> 1 "register_operand")
7188   (match_operand 2 "const_int_operand")]
7189  "TARGET_SSE"
7190{
7191  ix86_expand_vector_set (false, operands[0], operands[1],
7192			  INTVAL (operands[2]));
7193  DONE;
7194})
7195
7196(define_insn_and_split "*vec_extractv4sf_0"
7197  [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7198	(vec_select:SF
7199	  (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7200	  (parallel [(const_int 0)])))]
7201  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7202  "#"
7203  "&& reload_completed"
7204  [(set (match_dup 0) (match_dup 1))]
7205  "operands[1] = gen_lowpart (SFmode, operands[1]);")
7206
7207(define_insn_and_split "*sse4_1_extractps"
7208  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7209	(vec_select:SF
7210	  (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7211	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7212  "TARGET_SSE4_1"
7213  "@
7214   extractps\t{%2, %1, %0|%0, %1, %2}
7215   extractps\t{%2, %1, %0|%0, %1, %2}
7216   vextractps\t{%2, %1, %0|%0, %1, %2}
7217   #
7218   #"
7219  "&& reload_completed && SSE_REG_P (operands[0])"
7220  [(const_int 0)]
7221{
7222  rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7223  switch (INTVAL (operands[2]))
7224    {
7225    case 1:
7226    case 3:
7227      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7228				      operands[2], operands[2],
7229				      GEN_INT (INTVAL (operands[2]) + 4),
7230				      GEN_INT (INTVAL (operands[2]) + 4)));
7231      break;
7232    case 2:
7233      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7234      break;
7235    default:
7236      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
7237      gcc_unreachable ();
7238    }
7239  DONE;
7240}
7241  [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7242   (set_attr "type" "sselog,sselog,sselog,*,*")
7243   (set_attr "prefix_data16" "1,1,1,*,*")
7244   (set_attr "prefix_extra" "1,1,1,*,*")
7245   (set_attr "length_immediate" "1,1,1,*,*")
7246   (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7247   (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7248
7249(define_insn_and_split "*vec_extractv4sf_mem"
7250  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7251	(vec_select:SF
7252	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
7253	  (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7254  "TARGET_SSE"
7255  "#"
7256  "&& reload_completed"
7257  [(set (match_dup 0) (match_dup 1))]
7258{
7259  operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7260})
7261
7262(define_mode_attr extract_type
7263  [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7264
7265(define_mode_attr extract_suf
7266  [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7267
7268(define_mode_iterator AVX512_VEC
7269  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7270
7271(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7272  [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7273   (match_operand:AVX512_VEC 1 "register_operand")
7274   (match_operand:SI 2 "const_0_to_3_operand")
7275   (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7276   (match_operand:QI 4 "register_operand")]
7277  "TARGET_AVX512F"
7278{
7279  int mask;
7280  mask = INTVAL (operands[2]);
7281  rtx dest = operands[0];
7282
7283  if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7284    dest = gen_reg_rtx (<ssequartermode>mode);
7285
7286  if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7287    emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7288        operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7289	GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7290	operands[4]));
7291  else
7292    emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7293        operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7294	operands[4]));
7295  if (dest != operands[0])
7296    emit_move_insn (operands[0], dest);
7297  DONE;
7298})
7299
7300(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7301  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7302	(vec_merge:<ssequartermode>
7303	  (vec_select:<ssequartermode>
7304	    (match_operand:V8FI 1 "register_operand" "v")
7305	    (parallel [(match_operand 2  "const_0_to_7_operand")
7306	      (match_operand 3  "const_0_to_7_operand")]))
7307	  (match_operand:<ssequartermode> 4 "memory_operand" "0")
7308	  (match_operand:QI 5 "register_operand" "Yk")))]
7309  "TARGET_AVX512DQ
7310   && INTVAL (operands[2]) % 2 == 0
7311   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7312   && rtx_equal_p (operands[4], operands[0])"
7313{
7314  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7315  return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7316}
7317  [(set_attr "type" "sselog")
7318   (set_attr "prefix_extra" "1")
7319   (set_attr "length_immediate" "1")
7320   (set_attr "memory" "store")
7321   (set_attr "prefix" "evex")
7322   (set_attr "mode" "<sseinsnmode>")])
7323
7324(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7325  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7326	(vec_merge:<ssequartermode>
7327	  (vec_select:<ssequartermode>
7328	    (match_operand:V16FI 1 "register_operand" "v")
7329	    (parallel [(match_operand 2  "const_0_to_15_operand")
7330	      (match_operand 3  "const_0_to_15_operand")
7331	      (match_operand 4  "const_0_to_15_operand")
7332	      (match_operand 5  "const_0_to_15_operand")]))
7333	  (match_operand:<ssequartermode> 6 "memory_operand" "0")
7334	  (match_operand:QI 7 "register_operand" "Yk")))]
7335  "TARGET_AVX512F
7336   && INTVAL (operands[2]) % 4 == 0
7337   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7338   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7339   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
7340   && rtx_equal_p (operands[6], operands[0])"
7341{
7342  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7343  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7344}
7345  [(set_attr "type" "sselog")
7346   (set_attr "prefix_extra" "1")
7347   (set_attr "length_immediate" "1")
7348   (set_attr "memory" "store")
7349   (set_attr "prefix" "evex")
7350   (set_attr "mode" "<sseinsnmode>")])
7351
7352(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7353  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7354	(vec_select:<ssequartermode>
7355	  (match_operand:V8FI 1 "register_operand" "v")
7356	  (parallel [(match_operand 2  "const_0_to_7_operand")
7357            (match_operand 3  "const_0_to_7_operand")])))]
7358  "TARGET_AVX512DQ
7359   && INTVAL (operands[2]) % 2 == 0
7360   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
7361{
7362  operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
7363  return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7364}
7365  [(set_attr "type" "sselog1")
7366   (set_attr "prefix_extra" "1")
7367   (set_attr "length_immediate" "1")
7368   (set_attr "prefix" "evex")
7369   (set_attr "mode" "<sseinsnmode>")])
7370
7371(define_split
7372  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
7373	(vec_select:<ssequartermode>
7374	  (match_operand:V8FI 1 "register_operand")
7375	  (parallel [(const_int 0) (const_int 1)])))]
7376  "TARGET_AVX512DQ
7377   && reload_completed
7378   && (TARGET_AVX512VL
7379       || REG_P (operands[0])
7380       || !EXT_REX_SSE_REG_P (operands[1]))"
7381  [(set (match_dup 0) (match_dup 1))]
7382{
7383  if (!TARGET_AVX512VL
7384      && REG_P (operands[0])
7385      && EXT_REX_SSE_REG_P (operands[1]))
7386    operands[0]
7387      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
7388  else
7389    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
7390})
7391
7392(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7393  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7394	(vec_select:<ssequartermode>
7395	  (match_operand:V16FI 1 "register_operand" "v")
7396	  (parallel [(match_operand 2  "const_0_to_15_operand")
7397            (match_operand 3  "const_0_to_15_operand")
7398            (match_operand 4  "const_0_to_15_operand")
7399            (match_operand 5  "const_0_to_15_operand")])))]
7400  "TARGET_AVX512F
7401   && INTVAL (operands[2]) % 4 == 0
7402   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7403   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7404   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
7405{
7406  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7407  return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7408}
7409  [(set_attr "type" "sselog1")
7410   (set_attr "prefix_extra" "1")
7411   (set_attr "length_immediate" "1")
7412   (set_attr "prefix" "evex")
7413   (set_attr "mode" "<sseinsnmode>")])
7414
7415(define_split
7416  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
7417	(vec_select:<ssequartermode>
7418	  (match_operand:V16FI 1 "register_operand")
7419	  (parallel [(const_int 0) (const_int 1)
7420		     (const_int 2) (const_int 3)])))]
7421  "TARGET_AVX512F
7422   && reload_completed
7423   && (TARGET_AVX512VL
7424       || REG_P (operands[0])
7425       || !EXT_REX_SSE_REG_P (operands[1]))"
7426  [(set (match_dup 0) (match_dup 1))]
7427{
7428  if (!TARGET_AVX512VL
7429      && REG_P (operands[0])
7430      && EXT_REX_SSE_REG_P (operands[1]))
7431    operands[0]
7432      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
7433  else
7434    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
7435})
7436
7437(define_mode_attr extract_type_2
7438  [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7439
7440(define_mode_attr extract_suf_2
7441  [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7442
7443(define_mode_iterator AVX512_VEC_2
7444  [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7445
7446(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7447  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7448   (match_operand:AVX512_VEC_2 1 "register_operand")
7449   (match_operand:SI 2 "const_0_to_1_operand")
7450   (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7451   (match_operand:QI 4 "register_operand")]
7452  "TARGET_AVX512F"
7453{
7454  rtx (*insn)(rtx, rtx, rtx, rtx);
7455  rtx dest = operands[0];
7456
7457  if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
7458    dest = gen_reg_rtx (<ssehalfvecmode>mode);
7459
7460  switch (INTVAL (operands[2]))
7461    {
7462    case 0:
7463      insn = gen_vec_extract_lo_<mode>_mask;
7464      break;
7465    case 1:
7466      insn = gen_vec_extract_hi_<mode>_mask;
7467      break;
7468    default:
7469      gcc_unreachable ();
7470    }
7471
7472  emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7473  if (dest != operands[0])
7474    emit_move_insn (operands[0], dest);
7475  DONE;
7476})
7477
7478(define_split
7479  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7480	(vec_select:<ssehalfvecmode>
7481	  (match_operand:V8FI 1 "nonimmediate_operand")
7482	  (parallel [(const_int 0) (const_int 1)
7483            (const_int 2) (const_int 3)])))]
7484  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7485   && reload_completed
7486   && (TARGET_AVX512VL
7487       || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7488  [(set (match_dup 0) (match_dup 1))]
7489  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7490
7491(define_insn "vec_extract_lo_<mode>_maskm"
7492  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7493	(vec_merge:<ssehalfvecmode>
7494	  (vec_select:<ssehalfvecmode>
7495	    (match_operand:V8FI 1 "register_operand" "v")
7496	    (parallel [(const_int 0) (const_int 1)
7497	      (const_int 2) (const_int 3)]))
7498	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7499	  (match_operand:QI 3 "register_operand" "Yk")))]
7500  "TARGET_AVX512F
7501   && rtx_equal_p (operands[2], operands[0])"
7502  "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7503  [(set_attr "type" "sselog1")
7504   (set_attr "prefix_extra" "1")
7505   (set_attr "length_immediate" "1")
7506   (set_attr "prefix" "evex")
7507   (set_attr "mode" "<sseinsnmode>")])
7508
7509(define_insn "vec_extract_lo_<mode><mask_name>"
7510  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>,v")
7511	(vec_select:<ssehalfvecmode>
7512	  (match_operand:V8FI 1 "<store_mask_predicate>" "v,v,<store_mask_constraint>")
7513	  (parallel [(const_int 0) (const_int 1)
7514            (const_int 2) (const_int 3)])))]
7515  "TARGET_AVX512F
7516   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7517{
7518  if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
7519    return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7520  else
7521    return "#";
7522}
7523  [(set_attr "type" "sselog1")
7524   (set_attr "prefix_extra" "1")
7525   (set_attr "length_immediate" "1")
7526   (set_attr "memory" "none,store,load")
7527   (set_attr "prefix" "evex")
7528   (set_attr "mode" "<sseinsnmode>")])
7529
7530(define_insn "vec_extract_hi_<mode>_maskm"
7531  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7532	(vec_merge:<ssehalfvecmode>
7533	  (vec_select:<ssehalfvecmode>
7534	    (match_operand:V8FI 1 "register_operand" "v")
7535	    (parallel [(const_int 4) (const_int 5)
7536	      (const_int 6) (const_int 7)]))
7537	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7538	  (match_operand:QI 3 "register_operand" "Yk")))]
7539  "TARGET_AVX512F
7540   && rtx_equal_p (operands[2], operands[0])"
7541  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7542  [(set_attr "type" "sselog")
7543   (set_attr "prefix_extra" "1")
7544   (set_attr "length_immediate" "1")
7545   (set_attr "memory" "store")
7546   (set_attr "prefix" "evex")
7547   (set_attr "mode" "<sseinsnmode>")])
7548
7549(define_insn "vec_extract_hi_<mode><mask_name>"
7550  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7551	(vec_select:<ssehalfvecmode>
7552	  (match_operand:V8FI 1 "register_operand" "v")
7553	  (parallel [(const_int 4) (const_int 5)
7554            (const_int 6) (const_int 7)])))]
7555  "TARGET_AVX512F"
7556  "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7557  [(set_attr "type" "sselog1")
7558   (set_attr "prefix_extra" "1")
7559   (set_attr "length_immediate" "1")
7560   (set_attr "prefix" "evex")
7561   (set_attr "mode" "<sseinsnmode>")])
7562
7563(define_insn "vec_extract_hi_<mode>_maskm"
7564   [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7565	(vec_merge:<ssehalfvecmode>
7566	  (vec_select:<ssehalfvecmode>
7567	    (match_operand:V16FI 1 "register_operand" "v")
7568	    (parallel [(const_int 8) (const_int 9)
7569	      (const_int 10) (const_int 11)
7570	      (const_int 12) (const_int 13)
7571	      (const_int 14) (const_int 15)]))
7572	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7573	  (match_operand:QI 3 "register_operand" "Yk")))]
7574  "TARGET_AVX512DQ
7575   && rtx_equal_p (operands[2], operands[0])"
7576  "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7577  [(set_attr "type" "sselog1")
7578   (set_attr "prefix_extra" "1")
7579   (set_attr "length_immediate" "1")
7580   (set_attr "prefix" "evex")
7581   (set_attr "mode" "<sseinsnmode>")])
7582
7583(define_insn "vec_extract_hi_<mode><mask_name>"
7584  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7585	(vec_select:<ssehalfvecmode>
7586	  (match_operand:V16FI 1 "register_operand" "v,v")
7587	  (parallel [(const_int 8) (const_int 9)
7588            (const_int 10) (const_int 11)
7589	    (const_int 12) (const_int 13)
7590	    (const_int 14) (const_int 15)])))]
7591  "TARGET_AVX512F && <mask_avx512dq_condition>"
7592  "@
7593   vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7594   vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7595  [(set_attr "type" "sselog1")
7596   (set_attr "prefix_extra" "1")
7597   (set_attr "isa" "avx512dq,noavx512dq")
7598   (set_attr "length_immediate" "1")
7599   (set_attr "prefix" "evex")
7600   (set_attr "mode" "<sseinsnmode>")])
7601
7602(define_expand "avx512vl_vextractf128<mode>"
7603  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7604   (match_operand:VI48F_256 1 "register_operand")
7605   (match_operand:SI 2 "const_0_to_1_operand")
7606   (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7607   (match_operand:QI 4 "register_operand")]
7608  "TARGET_AVX512DQ && TARGET_AVX512VL"
7609{
7610  rtx (*insn)(rtx, rtx, rtx, rtx);
7611  rtx dest = operands[0];
7612
7613  if (MEM_P (dest)
7614      && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
7615	  /* For V8S[IF]mode there are maskm insns with =m and 0
7616	     constraints.  */
7617	  ? !rtx_equal_p (dest, operands[3])
7618	  /* For V4D[IF]mode, hi insns don't allow memory, and
7619	     lo insns have =m and 0C constraints.  */
7620	  : (operands[2] != const0_rtx
7621	     || (!rtx_equal_p (dest, operands[3])
7622		 && GET_CODE (operands[3]) != CONST_VECTOR))))
7623    dest = gen_reg_rtx (<ssehalfvecmode>mode);
7624  switch (INTVAL (operands[2]))
7625    {
7626    case 0:
7627      insn = gen_vec_extract_lo_<mode>_mask;
7628      break;
7629    case 1:
7630      insn = gen_vec_extract_hi_<mode>_mask;
7631      break;
7632    default:
7633      gcc_unreachable ();
7634    }
7635
7636  emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7637  if (dest != operands[0])
7638    emit_move_insn (operands[0], dest);
7639  DONE;
7640})
7641
7642(define_expand "avx_vextractf128<mode>"
7643  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7644   (match_operand:V_256 1 "register_operand")
7645   (match_operand:SI 2 "const_0_to_1_operand")]
7646  "TARGET_AVX"
7647{
7648  rtx (*insn)(rtx, rtx);
7649
7650  switch (INTVAL (operands[2]))
7651    {
7652    case 0:
7653      insn = gen_vec_extract_lo_<mode>;
7654      break;
7655    case 1:
7656      insn = gen_vec_extract_hi_<mode>;
7657      break;
7658    default:
7659      gcc_unreachable ();
7660    }
7661
7662  emit_insn (insn (operands[0], operands[1]));
7663  DONE;
7664})
7665
7666(define_insn "vec_extract_lo_<mode><mask_name>"
7667  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
7668	(vec_select:<ssehalfvecmode>
7669	  (match_operand:V16FI 1 "<store_mask_predicate>"
7670				 "v,<store_mask_constraint>,v")
7671	  (parallel [(const_int 0) (const_int 1)
7672                     (const_int 2) (const_int 3)
7673                     (const_int 4) (const_int 5)
7674                     (const_int 6) (const_int 7)])))]
7675  "TARGET_AVX512F
7676   && <mask_mode512bit_condition>
7677   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7678{
7679  if (<mask_applied>
7680      || (!TARGET_AVX512VL
7681	  && !REG_P (operands[0])
7682	  && EXT_REX_SSE_REG_P (operands[1])))
7683    return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7684  else
7685    return "#";
7686}
7687  [(set_attr "type" "sselog1")
7688   (set_attr "prefix_extra" "1")
7689   (set_attr "length_immediate" "1")
7690   (set_attr "memory" "none,load,store")
7691   (set_attr "prefix" "evex")
7692   (set_attr "mode" "<sseinsnmode>")])
7693
7694(define_split
7695  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7696	(vec_select:<ssehalfvecmode>
7697	  (match_operand:V16FI 1 "nonimmediate_operand")
7698	  (parallel [(const_int 0) (const_int 1)
7699            (const_int 2) (const_int 3)
7700	    (const_int 4) (const_int 5)
7701	    (const_int 6) (const_int 7)])))]
7702  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7703   && reload_completed
7704   && (TARGET_AVX512VL
7705       || REG_P (operands[0])
7706       || !EXT_REX_SSE_REG_P (operands[1]))"
7707  [(set (match_dup 0) (match_dup 1))]
7708{
7709  if (!TARGET_AVX512VL
7710      && REG_P (operands[0])
7711      && EXT_REX_SSE_REG_P (operands[1]))
7712    operands[0]
7713      = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
7714  else
7715    operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
7716})
7717
7718(define_insn "vec_extract_lo_<mode><mask_name>"
7719  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
7720	(vec_select:<ssehalfvecmode>
7721	  (match_operand:VI8F_256 1 "<store_mask_predicate>"
7722				    "v,<store_mask_constraint>,v")
7723	  (parallel [(const_int 0) (const_int 1)])))]
7724  "TARGET_AVX
7725   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7726   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7727{
7728  if (<mask_applied>)
7729    return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7730  else
7731    return "#";
7732}
7733   [(set_attr "type" "sselog1")
7734    (set_attr "prefix_extra" "1")
7735    (set_attr "length_immediate" "1")
7736    (set_attr "memory" "none,load,store")
7737    (set_attr "prefix" "evex")
7738    (set_attr "mode" "XI")])
7739
7740(define_split
7741  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7742	(vec_select:<ssehalfvecmode>
7743	  (match_operand:VI8F_256 1 "nonimmediate_operand")
7744	  (parallel [(const_int 0) (const_int 1)])))]
7745  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7746   && reload_completed"
7747  [(set (match_dup 0) (match_dup 1))]
7748  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7749
7750(define_insn "vec_extract_hi_<mode><mask_name>"
7751  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7752	(vec_select:<ssehalfvecmode>
7753	  (match_operand:VI8F_256 1 "register_operand" "v,v")
7754	  (parallel [(const_int 2) (const_int 3)])))]
7755  "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7756{
7757  if (TARGET_AVX512VL)
7758  {
7759    if (TARGET_AVX512DQ)
7760      return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7761    else
7762      return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7763  }
7764  else
7765    return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7766}
7767  [(set_attr "type" "sselog1")
7768   (set_attr "prefix_extra" "1")
7769   (set_attr "length_immediate" "1")
7770   (set_attr "prefix" "vex")
7771   (set_attr "mode" "<sseinsnmode>")])
7772
7773(define_split
7774  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7775	(vec_select:<ssehalfvecmode>
7776	  (match_operand:VI4F_256 1 "nonimmediate_operand")
7777	  (parallel [(const_int 0) (const_int 1)
7778		     (const_int 2) (const_int 3)])))]
7779  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7780   && reload_completed"
7781  [(set (match_dup 0) (match_dup 1))]
7782  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7783
7784(define_insn "vec_extract_lo_<mode><mask_name>"
7785  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
7786					  "=<store_mask_constraint>,v")
7787	(vec_select:<ssehalfvecmode>
7788	  (match_operand:VI4F_256 1 "<store_mask_predicate>"
7789				    "v,<store_mask_constraint>")
7790	  (parallel [(const_int 0) (const_int 1)
7791		     (const_int 2) (const_int 3)])))]
7792  "TARGET_AVX
7793   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7794   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7795{
7796  if (<mask_applied>)
7797    return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7798  else
7799    return "#";
7800}
7801  [(set_attr "type" "sselog1")
7802   (set_attr "prefix_extra" "1")
7803   (set_attr "length_immediate" "1")
7804   (set_attr "prefix" "evex")
7805   (set_attr "mode" "<sseinsnmode>")])
7806
7807(define_insn "vec_extract_lo_<mode>_maskm"
7808  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7809	(vec_merge:<ssehalfvecmode>
7810	  (vec_select:<ssehalfvecmode>
7811	    (match_operand:VI4F_256 1 "register_operand" "v")
7812	    (parallel [(const_int 0) (const_int 1)
7813		      (const_int 2) (const_int 3)]))
7814	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7815	  (match_operand:QI 3 "register_operand" "Yk")))]
7816  "TARGET_AVX512VL && TARGET_AVX512F
7817   && rtx_equal_p (operands[2], operands[0])"
7818  "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7819  [(set_attr "type" "sselog1")
7820   (set_attr "prefix_extra" "1")
7821   (set_attr "length_immediate" "1")
7822   (set_attr "prefix" "evex")
7823   (set_attr "mode" "<sseinsnmode>")])
7824
7825(define_insn "vec_extract_hi_<mode>_maskm"
7826  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7827	(vec_merge:<ssehalfvecmode>
7828	  (vec_select:<ssehalfvecmode>
7829	    (match_operand:VI4F_256 1 "register_operand" "v")
7830	    (parallel [(const_int 4) (const_int 5)
7831		      (const_int 6) (const_int 7)]))
7832	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7833	  (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
7834  "TARGET_AVX512F && TARGET_AVX512VL
7835   && rtx_equal_p (operands[2], operands[0])"
7836  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7837  [(set_attr "type" "sselog1")
7838   (set_attr "length_immediate" "1")
7839   (set_attr "prefix" "evex")
7840   (set_attr "mode" "<sseinsnmode>")])
7841
7842(define_insn "vec_extract_hi_<mode>_mask"
7843  [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7844	(vec_merge:<ssehalfvecmode>
7845	  (vec_select:<ssehalfvecmode>
7846	    (match_operand:VI4F_256 1 "register_operand" "v")
7847	    (parallel [(const_int 4) (const_int 5)
7848		       (const_int 6) (const_int 7)]))
7849	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7850	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7851  "TARGET_AVX512VL"
7852  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7853  [(set_attr "type" "sselog1")
7854   (set_attr "length_immediate" "1")
7855   (set_attr "prefix" "evex")
7856   (set_attr "mode" "<sseinsnmode>")])
7857
7858(define_insn "vec_extract_hi_<mode>"
7859  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7860	(vec_select:<ssehalfvecmode>
7861	  (match_operand:VI4F_256 1 "register_operand" "x, v")
7862	  (parallel [(const_int 4) (const_int 5)
7863		     (const_int 6) (const_int 7)])))]
7864  "TARGET_AVX"
7865  "@
7866    vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7867    vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7868  [(set_attr "isa" "*, avx512vl")
7869   (set_attr "prefix" "vex, evex")
7870   (set_attr "type" "sselog1")
7871   (set_attr "length_immediate" "1")
7872   (set_attr "mode" "<sseinsnmode>")])
7873
7874(define_insn_and_split "vec_extract_lo_v32hi"
7875  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
7876	(vec_select:V16HI
7877	  (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
7878	  (parallel [(const_int 0) (const_int 1)
7879		     (const_int 2) (const_int 3)
7880		     (const_int 4) (const_int 5)
7881		     (const_int 6) (const_int 7)
7882		     (const_int 8) (const_int 9)
7883		     (const_int 10) (const_int 11)
7884		     (const_int 12) (const_int 13)
7885		     (const_int 14) (const_int 15)])))]
7886  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7887{
7888  if (TARGET_AVX512VL
7889      || REG_P (operands[0])
7890      || !EXT_REX_SSE_REG_P (operands[1]))
7891    return "#";
7892  else
7893    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
7894}
7895  "&& reload_completed
7896   && (TARGET_AVX512VL
7897       || REG_P (operands[0])
7898       || !EXT_REX_SSE_REG_P (operands[1]))"
7899  [(set (match_dup 0) (match_dup 1))]
7900{
7901  if (!TARGET_AVX512VL
7902      && REG_P (operands[0])
7903      && EXT_REX_SSE_REG_P (operands[1]))
7904    operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
7905  else
7906    operands[1] = gen_lowpart (V16HImode, operands[1]);
7907}
7908  [(set_attr "type" "sselog1")
7909   (set_attr "prefix_extra" "1")
7910   (set_attr "length_immediate" "1")
7911   (set_attr "memory" "none,load,store")
7912   (set_attr "prefix" "evex")
7913   (set_attr "mode" "XI")])
7914
7915(define_insn "vec_extract_hi_v32hi"
7916  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
7917	(vec_select:V16HI
7918	  (match_operand:V32HI 1 "register_operand" "v")
7919	  (parallel [(const_int 16) (const_int 17)
7920		     (const_int 18) (const_int 19)
7921		     (const_int 20) (const_int 21)
7922		     (const_int 22) (const_int 23)
7923		     (const_int 24) (const_int 25)
7924		     (const_int 26) (const_int 27)
7925		     (const_int 28) (const_int 29)
7926		     (const_int 30) (const_int 31)])))]
7927  "TARGET_AVX512F"
7928  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7929  [(set_attr "type" "sselog1")
7930   (set_attr "prefix_extra" "1")
7931   (set_attr "length_immediate" "1")
7932   (set_attr "prefix" "evex")
7933   (set_attr "mode" "XI")])
7934
7935(define_insn_and_split "vec_extract_lo_v16hi"
7936  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
7937	(vec_select:V8HI
7938	  (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
7939	  (parallel [(const_int 0) (const_int 1)
7940		     (const_int 2) (const_int 3)
7941		     (const_int 4) (const_int 5)
7942		     (const_int 6) (const_int 7)])))]
7943  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7944  "#"
7945  "&& reload_completed"
7946  [(set (match_dup 0) (match_dup 1))]
7947  "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7948
7949(define_insn "vec_extract_hi_v16hi"
7950  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
7951	(vec_select:V8HI
7952	  (match_operand:V16HI 1 "register_operand" "x,v,v")
7953	  (parallel [(const_int 8) (const_int 9)
7954		     (const_int 10) (const_int 11)
7955		     (const_int 12) (const_int 13)
7956		     (const_int 14) (const_int 15)])))]
7957  "TARGET_AVX"
7958  "@
7959   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7960   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7961   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7962  [(set_attr "type" "sselog1")
7963   (set_attr "prefix_extra" "1")
7964   (set_attr "length_immediate" "1")
7965   (set_attr "isa" "*,avx512dq,avx512f")
7966   (set_attr "prefix" "vex,evex,evex")
7967   (set_attr "mode" "OI")])
7968
7969(define_insn_and_split "vec_extract_lo_v64qi"
7970  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
7971	(vec_select:V32QI
7972	  (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
7973	  (parallel [(const_int 0) (const_int 1)
7974		     (const_int 2) (const_int 3)
7975		     (const_int 4) (const_int 5)
7976		     (const_int 6) (const_int 7)
7977		     (const_int 8) (const_int 9)
7978		     (const_int 10) (const_int 11)
7979		     (const_int 12) (const_int 13)
7980		     (const_int 14) (const_int 15)
7981		     (const_int 16) (const_int 17)
7982		     (const_int 18) (const_int 19)
7983		     (const_int 20) (const_int 21)
7984		     (const_int 22) (const_int 23)
7985		     (const_int 24) (const_int 25)
7986		     (const_int 26) (const_int 27)
7987		     (const_int 28) (const_int 29)
7988		     (const_int 30) (const_int 31)])))]
7989  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7990{
7991  if (TARGET_AVX512VL
7992      || REG_P (operands[0])
7993      || !EXT_REX_SSE_REG_P (operands[1]))
7994    return "#";
7995  else
7996    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
7997}
7998  "&& reload_completed
7999   && (TARGET_AVX512VL
8000       || REG_P (operands[0])
8001       || !EXT_REX_SSE_REG_P (operands[1]))"
8002  [(set (match_dup 0) (match_dup 1))]
8003{
8004  if (!TARGET_AVX512VL
8005      && REG_P (operands[0])
8006      && EXT_REX_SSE_REG_P (operands[1]))
8007    operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
8008  else
8009    operands[1] = gen_lowpart (V32QImode, operands[1]);
8010}
8011  [(set_attr "type" "sselog1")
8012   (set_attr "prefix_extra" "1")
8013   (set_attr "length_immediate" "1")
8014   (set_attr "memory" "none,load,store")
8015   (set_attr "prefix" "evex")
8016   (set_attr "mode" "XI")])
8017
8018(define_insn "vec_extract_hi_v64qi"
8019  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
8020	(vec_select:V32QI
8021	  (match_operand:V64QI 1 "register_operand" "v")
8022	  (parallel [(const_int 32) (const_int 33)
8023		     (const_int 34) (const_int 35)
8024		     (const_int 36) (const_int 37)
8025		     (const_int 38) (const_int 39)
8026		     (const_int 40) (const_int 41)
8027		     (const_int 42) (const_int 43)
8028		     (const_int 44) (const_int 45)
8029		     (const_int 46) (const_int 47)
8030		     (const_int 48) (const_int 49)
8031		     (const_int 50) (const_int 51)
8032		     (const_int 52) (const_int 53)
8033		     (const_int 54) (const_int 55)
8034		     (const_int 56) (const_int 57)
8035		     (const_int 58) (const_int 59)
8036		     (const_int 60) (const_int 61)
8037		     (const_int 62) (const_int 63)])))]
8038  "TARGET_AVX512F"
8039  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8040  [(set_attr "type" "sselog1")
8041   (set_attr "prefix_extra" "1")
8042   (set_attr "length_immediate" "1")
8043   (set_attr "prefix" "evex")
8044   (set_attr "mode" "XI")])
8045
8046(define_insn_and_split "vec_extract_lo_v32qi"
8047  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
8048	(vec_select:V16QI
8049	  (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
8050	  (parallel [(const_int 0) (const_int 1)
8051		     (const_int 2) (const_int 3)
8052		     (const_int 4) (const_int 5)
8053		     (const_int 6) (const_int 7)
8054		     (const_int 8) (const_int 9)
8055		     (const_int 10) (const_int 11)
8056		     (const_int 12) (const_int 13)
8057		     (const_int 14) (const_int 15)])))]
8058  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8059  "#"
8060  "&& reload_completed"
8061  [(set (match_dup 0) (match_dup 1))]
8062  "operands[1] = gen_lowpart (V16QImode, operands[1]);")
8063
8064(define_insn "vec_extract_hi_v32qi"
8065  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
8066	(vec_select:V16QI
8067	  (match_operand:V32QI 1 "register_operand" "x,v,v")
8068	  (parallel [(const_int 16) (const_int 17)
8069		     (const_int 18) (const_int 19)
8070		     (const_int 20) (const_int 21)
8071		     (const_int 22) (const_int 23)
8072		     (const_int 24) (const_int 25)
8073		     (const_int 26) (const_int 27)
8074		     (const_int 28) (const_int 29)
8075		     (const_int 30) (const_int 31)])))]
8076  "TARGET_AVX"
8077  "@
8078   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8079   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8080   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8081  [(set_attr "type" "sselog1")
8082   (set_attr "prefix_extra" "1")
8083   (set_attr "length_immediate" "1")
8084   (set_attr "isa" "*,avx512dq,avx512f")
8085   (set_attr "prefix" "vex,evex,evex")
8086   (set_attr "mode" "OI")])
8087
8088;; Modes handled by vec_extract patterns.
8089(define_mode_iterator VEC_EXTRACT_MODE
8090  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
8091   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
8092   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
8093   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
8094   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
8095   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
8096   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
8097
8098(define_expand "vec_extract<mode><ssescalarmodelower>"
8099  [(match_operand:<ssescalarmode> 0 "register_operand")
8100   (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
8101   (match_operand 2 "const_int_operand")]
8102  "TARGET_SSE"
8103{
8104  ix86_expand_vector_extract (false, operands[0], operands[1],
8105			      INTVAL (operands[2]));
8106  DONE;
8107})
8108
8109(define_expand "vec_extract<mode><ssehalfvecmodelower>"
8110  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8111   (match_operand:V_512 1 "register_operand")
8112   (match_operand 2 "const_0_to_1_operand")]
8113  "TARGET_AVX512F"
8114{
8115  if (INTVAL (operands[2]))
8116    emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
8117  else
8118    emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
8119  DONE;
8120})
8121
8122;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8123;;
8124;; Parallel double-precision floating point element swizzling
8125;;
8126;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8127
8128(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
8129  [(set (match_operand:V8DF 0 "register_operand" "=v")
8130	(vec_select:V8DF
8131	  (vec_concat:V16DF
8132	    (match_operand:V8DF 1 "register_operand" "v")
8133	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8134	  (parallel [(const_int 1) (const_int 9)
8135		     (const_int 3) (const_int 11)
8136		     (const_int 5) (const_int 13)
8137		     (const_int 7) (const_int 15)])))]
8138  "TARGET_AVX512F"
8139  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8140  [(set_attr "type" "sselog")
8141   (set_attr "prefix" "evex")
8142   (set_attr "mode" "V8DF")])
8143
8144;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8145(define_insn "avx_unpckhpd256<mask_name>"
8146  [(set (match_operand:V4DF 0 "register_operand" "=v")
8147	(vec_select:V4DF
8148	  (vec_concat:V8DF
8149	    (match_operand:V4DF 1 "register_operand" "v")
8150	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8151	  (parallel [(const_int 1) (const_int 5)
8152		     (const_int 3) (const_int 7)])))]
8153  "TARGET_AVX && <mask_avx512vl_condition>"
8154  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8155  [(set_attr "type" "sselog")
8156   (set_attr "prefix" "vex")
8157   (set_attr "mode" "V4DF")])
8158
8159(define_expand "vec_interleave_highv4df"
8160  [(set (match_dup 3)
8161	(vec_select:V4DF
8162	  (vec_concat:V8DF
8163	    (match_operand:V4DF 1 "register_operand")
8164	    (match_operand:V4DF 2 "nonimmediate_operand"))
8165	  (parallel [(const_int 0) (const_int 4)
8166		     (const_int 2) (const_int 6)])))
8167   (set (match_dup 4)
8168	(vec_select:V4DF
8169	  (vec_concat:V8DF
8170	    (match_dup 1)
8171	    (match_dup 2))
8172	  (parallel [(const_int 1) (const_int 5)
8173		     (const_int 3) (const_int 7)])))
8174   (set (match_operand:V4DF 0 "register_operand")
8175	(vec_select:V4DF
8176	  (vec_concat:V8DF
8177	    (match_dup 3)
8178	    (match_dup 4))
8179	  (parallel [(const_int 2) (const_int 3)
8180		     (const_int 6) (const_int 7)])))]
8181 "TARGET_AVX"
8182{
8183  operands[3] = gen_reg_rtx (V4DFmode);
8184  operands[4] = gen_reg_rtx (V4DFmode);
8185})
8186
8187
8188(define_insn "avx512vl_unpckhpd128_mask"
8189  [(set (match_operand:V2DF 0 "register_operand" "=v")
8190	(vec_merge:V2DF
8191	  (vec_select:V2DF
8192	    (vec_concat:V4DF
8193	      (match_operand:V2DF 1 "register_operand" "v")
8194	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8195	    (parallel [(const_int 1) (const_int 3)]))
8196	  (match_operand:V2DF 3 "vector_move_operand" "0C")
8197	  (match_operand:QI 4 "register_operand" "Yk")))]
8198  "TARGET_AVX512VL"
8199  "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8200  [(set_attr "type" "sselog")
8201   (set_attr "prefix" "evex")
8202   (set_attr "mode" "V2DF")])
8203
8204(define_expand "vec_interleave_highv2df"
8205  [(set (match_operand:V2DF 0 "register_operand")
8206	(vec_select:V2DF
8207	  (vec_concat:V4DF
8208	    (match_operand:V2DF 1 "nonimmediate_operand")
8209	    (match_operand:V2DF 2 "nonimmediate_operand"))
8210	  (parallel [(const_int 1)
8211		     (const_int 3)])))]
8212  "TARGET_SSE2"
8213{
8214  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8215    operands[2] = force_reg (V2DFmode, operands[2]);
8216})
8217
8218(define_insn "*vec_interleave_highv2df"
8219  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,m")
8220	(vec_select:V2DF
8221	  (vec_concat:V4DF
8222	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8223	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8224	  (parallel [(const_int 1)
8225		     (const_int 3)])))]
8226  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8227  "@
8228   unpckhpd\t{%2, %0|%0, %2}
8229   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8230   %vmovddup\t{%H1, %0|%0, %H1}
8231   movlpd\t{%H1, %0|%0, %H1}
8232   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8233   %vmovhpd\t{%1, %0|%q0, %1}"
8234  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8235   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8236   (set (attr "prefix_data16")
8237     (if_then_else (eq_attr "alternative" "3,5")
8238		   (const_string "1")
8239		   (const_string "*")))
8240   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8241   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8242
8243(define_expand "avx512f_movddup512<mask_name>"
8244  [(set (match_operand:V8DF 0 "register_operand")
8245	(vec_select:V8DF
8246	  (vec_concat:V16DF
8247	    (match_operand:V8DF 1 "nonimmediate_operand")
8248	    (match_dup 1))
8249	  (parallel [(const_int 0) (const_int 8)
8250		     (const_int 2) (const_int 10)
8251		     (const_int 4) (const_int 12)
8252		     (const_int 6) (const_int 14)])))]
8253  "TARGET_AVX512F")
8254
8255(define_expand "avx512f_unpcklpd512<mask_name>"
8256  [(set (match_operand:V8DF 0 "register_operand")
8257	(vec_select:V8DF
8258	  (vec_concat:V16DF
8259	    (match_operand:V8DF 1 "register_operand")
8260	    (match_operand:V8DF 2 "nonimmediate_operand"))
8261	  (parallel [(const_int 0) (const_int 8)
8262		     (const_int 2) (const_int 10)
8263		     (const_int 4) (const_int 12)
8264		     (const_int 6) (const_int 14)])))]
8265  "TARGET_AVX512F")
8266
8267(define_insn "*avx512f_unpcklpd512<mask_name>"
8268  [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8269	(vec_select:V8DF
8270	  (vec_concat:V16DF
8271	    (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8272	    (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8273	  (parallel [(const_int 0) (const_int 8)
8274		     (const_int 2) (const_int 10)
8275		     (const_int 4) (const_int 12)
8276		     (const_int 6) (const_int 14)])))]
8277  "TARGET_AVX512F"
8278  "@
8279   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8280   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8281  [(set_attr "type" "sselog")
8282   (set_attr "prefix" "evex")
8283   (set_attr "mode" "V8DF")])
8284
8285;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8286(define_expand "avx_movddup256<mask_name>"
8287  [(set (match_operand:V4DF 0 "register_operand")
8288	(vec_select:V4DF
8289	  (vec_concat:V8DF
8290	    (match_operand:V4DF 1 "nonimmediate_operand")
8291	    (match_dup 1))
8292	  (parallel [(const_int 0) (const_int 4)
8293		     (const_int 2) (const_int 6)])))]
8294  "TARGET_AVX && <mask_avx512vl_condition>")
8295
8296(define_expand "avx_unpcklpd256<mask_name>"
8297  [(set (match_operand:V4DF 0 "register_operand")
8298	(vec_select:V4DF
8299	  (vec_concat:V8DF
8300	    (match_operand:V4DF 1 "register_operand")
8301	    (match_operand:V4DF 2 "nonimmediate_operand"))
8302	  (parallel [(const_int 0) (const_int 4)
8303		     (const_int 2) (const_int 6)])))]
8304  "TARGET_AVX && <mask_avx512vl_condition>")
8305
8306(define_insn "*avx_unpcklpd256<mask_name>"
8307  [(set (match_operand:V4DF 0 "register_operand"         "=v,v")
8308	(vec_select:V4DF
8309	  (vec_concat:V8DF
8310	    (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
8311	    (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
8312	  (parallel [(const_int 0) (const_int 4)
8313		     (const_int 2) (const_int 6)])))]
8314  "TARGET_AVX && <mask_avx512vl_condition>"
8315  "@
8316   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
8317   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
8318  [(set_attr "type" "sselog")
8319   (set_attr "prefix" "vex")
8320   (set_attr "mode" "V4DF")])
8321
8322(define_expand "vec_interleave_lowv4df"
8323  [(set (match_dup 3)
8324	(vec_select:V4DF
8325	  (vec_concat:V8DF
8326	    (match_operand:V4DF 1 "register_operand")
8327	    (match_operand:V4DF 2 "nonimmediate_operand"))
8328	  (parallel [(const_int 0) (const_int 4)
8329		     (const_int 2) (const_int 6)])))
8330   (set (match_dup 4)
8331	(vec_select:V4DF
8332	  (vec_concat:V8DF
8333	    (match_dup 1)
8334	    (match_dup 2))
8335	  (parallel [(const_int 1) (const_int 5)
8336		     (const_int 3) (const_int 7)])))
8337   (set (match_operand:V4DF 0 "register_operand")
8338	(vec_select:V4DF
8339	  (vec_concat:V8DF
8340	    (match_dup 3)
8341	    (match_dup 4))
8342	  (parallel [(const_int 0) (const_int 1)
8343		     (const_int 4) (const_int 5)])))]
8344 "TARGET_AVX"
8345{
8346  operands[3] = gen_reg_rtx (V4DFmode);
8347  operands[4] = gen_reg_rtx (V4DFmode);
8348})
8349
8350(define_insn "avx512vl_unpcklpd128_mask"
8351  [(set (match_operand:V2DF 0 "register_operand" "=v")
8352	(vec_merge:V2DF
8353	  (vec_select:V2DF
8354	    (vec_concat:V4DF
8355	      (match_operand:V2DF 1 "register_operand" "v")
8356	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8357	    (parallel [(const_int 0) (const_int 2)]))
8358	  (match_operand:V2DF 3 "vector_move_operand" "0C")
8359	  (match_operand:QI 4 "register_operand" "Yk")))]
8360  "TARGET_AVX512VL"
8361  "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8362  [(set_attr "type" "sselog")
8363   (set_attr "prefix" "evex")
8364   (set_attr "mode" "V2DF")])
8365
8366(define_expand "vec_interleave_lowv2df"
8367  [(set (match_operand:V2DF 0 "register_operand")
8368	(vec_select:V2DF
8369	  (vec_concat:V4DF
8370	    (match_operand:V2DF 1 "nonimmediate_operand")
8371	    (match_operand:V2DF 2 "nonimmediate_operand"))
8372	  (parallel [(const_int 0)
8373		     (const_int 2)])))]
8374  "TARGET_SSE2"
8375{
8376  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8377    operands[1] = force_reg (V2DFmode, operands[1]);
8378})
8379
8380(define_insn "*vec_interleave_lowv2df"
8381  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,o")
8382	(vec_select:V2DF
8383	  (vec_concat:V4DF
8384	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
8385	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
8386	  (parallel [(const_int 0)
8387		     (const_int 2)])))]
8388  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8389  "@
8390   unpcklpd\t{%2, %0|%0, %2}
8391   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8392   %vmovddup\t{%1, %0|%0, %q1}
8393   movhpd\t{%2, %0|%0, %q2}
8394   vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8395   %vmovlpd\t{%2, %H0|%H0, %2}"
8396  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8397   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8398   (set (attr "prefix_data16")
8399     (if_then_else (eq_attr "alternative" "3,5")
8400		   (const_string "1")
8401		   (const_string "*")))
8402   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8403   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8404
8405(define_split
8406  [(set (match_operand:V2DF 0 "memory_operand")
8407	(vec_select:V2DF
8408	  (vec_concat:V4DF
8409	    (match_operand:V2DF 1 "register_operand")
8410	    (match_dup 1))
8411	  (parallel [(const_int 0)
8412		     (const_int 2)])))]
8413  "TARGET_SSE3 && reload_completed"
8414  [(const_int 0)]
8415{
8416  rtx low = gen_lowpart (DFmode, operands[1]);
8417
8418  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8419  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8420  DONE;
8421})
8422
8423(define_split
8424  [(set (match_operand:V2DF 0 "register_operand")
8425	(vec_select:V2DF
8426	  (vec_concat:V4DF
8427	    (match_operand:V2DF 1 "memory_operand")
8428	    (match_dup 1))
8429	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8430		     (match_operand:SI 3 "const_int_operand")])))]
8431  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8432  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8433{
8434  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8435})
8436
8437(define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
8438  [(set (match_operand:VF_128 0 "register_operand" "=v")
8439	(vec_merge:VF_128
8440	  (unspec:VF_128
8441	    [(match_operand:VF_128 1 "register_operand" "v")
8442	     (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
8443	    UNSPEC_SCALEF)
8444	  (match_dup 1)
8445	  (const_int 1)))]
8446  "TARGET_AVX512F"
8447  "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
8448  [(set_attr "prefix" "evex")
8449   (set_attr "mode"  "<ssescalarmode>")])
8450
8451(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8452  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8453	(unspec:VF_AVX512VL
8454	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8455	   (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8456	  UNSPEC_SCALEF))]
8457  "TARGET_AVX512F"
8458  "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8459  [(set_attr "prefix" "evex")
8460   (set_attr "mode"  "<MODE>")])
8461
8462(define_expand "<avx512>_vternlog<mode>_maskz"
8463  [(match_operand:VI48_AVX512VL 0 "register_operand")
8464   (match_operand:VI48_AVX512VL 1 "register_operand")
8465   (match_operand:VI48_AVX512VL 2 "register_operand")
8466   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8467   (match_operand:SI 4 "const_0_to_255_operand")
8468   (match_operand:<avx512fmaskmode> 5 "register_operand")]
8469  "TARGET_AVX512F"
8470{
8471  emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8472    operands[0], operands[1], operands[2], operands[3],
8473    operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8474  DONE;
8475})
8476
8477(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8478  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8479	(unspec:VI48_AVX512VL
8480	  [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8481	   (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8482	   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8483	   (match_operand:SI 4 "const_0_to_255_operand")]
8484	  UNSPEC_VTERNLOG))]
8485  "TARGET_AVX512F"
8486  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8487  [(set_attr "type" "sselog")
8488   (set_attr "prefix" "evex")
8489   (set_attr "mode" "<sseinsnmode>")])
8490
8491(define_insn "<avx512>_vternlog<mode>_mask"
8492  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8493	(vec_merge:VI48_AVX512VL
8494	  (unspec:VI48_AVX512VL
8495	    [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8496	     (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8497	     (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8498	     (match_operand:SI 4 "const_0_to_255_operand")]
8499	    UNSPEC_VTERNLOG)
8500	  (match_dup 1)
8501	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8502  "TARGET_AVX512F"
8503  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8504  [(set_attr "type" "sselog")
8505   (set_attr "prefix" "evex")
8506   (set_attr "mode" "<sseinsnmode>")])
8507
8508(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8509  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8510        (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8511                        UNSPEC_GETEXP))]
8512   "TARGET_AVX512F"
8513   "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8514    [(set_attr "prefix" "evex")
8515     (set_attr "mode" "<MODE>")])
8516
8517(define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
8518  [(set (match_operand:VF_128 0 "register_operand" "=v")
8519	(vec_merge:VF_128
8520	  (unspec:VF_128
8521	    [(match_operand:VF_128 1 "register_operand" "v")
8522	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
8523	    UNSPEC_GETEXP)
8524	  (match_dup 1)
8525	  (const_int 1)))]
8526   "TARGET_AVX512F"
8527   "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
8528    [(set_attr "prefix" "evex")
8529     (set_attr "mode" "<ssescalarmode>")])
8530
8531(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8532  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8533        (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8534			       (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8535			       (match_operand:SI 3 "const_0_to_255_operand")]
8536			      UNSPEC_ALIGN))]
8537  "TARGET_AVX512F"
8538  "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8539  [(set_attr "prefix" "evex")
8540   (set_attr "mode" "<sseinsnmode>")])
8541
8542(define_expand "avx512f_shufps512_mask"
8543  [(match_operand:V16SF 0 "register_operand")
8544   (match_operand:V16SF 1 "register_operand")
8545   (match_operand:V16SF 2 "nonimmediate_operand")
8546   (match_operand:SI 3 "const_0_to_255_operand")
8547   (match_operand:V16SF 4 "register_operand")
8548   (match_operand:HI 5 "register_operand")]
8549  "TARGET_AVX512F"
8550{
8551  int mask = INTVAL (operands[3]);
8552  emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8553					  GEN_INT ((mask >> 0) & 3),
8554					  GEN_INT ((mask >> 2) & 3),
8555					  GEN_INT (((mask >> 4) & 3) + 16),
8556					  GEN_INT (((mask >> 6) & 3) + 16),
8557					  GEN_INT (((mask >> 0) & 3) + 4),
8558					  GEN_INT (((mask >> 2) & 3) + 4),
8559					  GEN_INT (((mask >> 4) & 3) + 20),
8560					  GEN_INT (((mask >> 6) & 3) + 20),
8561					  GEN_INT (((mask >> 0) & 3) + 8),
8562					  GEN_INT (((mask >> 2) & 3) + 8),
8563					  GEN_INT (((mask >> 4) & 3) + 24),
8564					  GEN_INT (((mask >> 6) & 3) + 24),
8565					  GEN_INT (((mask >> 0) & 3) + 12),
8566					  GEN_INT (((mask >> 2) & 3) + 12),
8567					  GEN_INT (((mask >> 4) & 3) + 28),
8568					  GEN_INT (((mask >> 6) & 3) + 28),
8569					  operands[4], operands[5]));
8570  DONE;
8571})
8572
8573
8574(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8575  [(match_operand:VF_AVX512VL 0 "register_operand")
8576   (match_operand:VF_AVX512VL 1 "register_operand")
8577   (match_operand:VF_AVX512VL 2 "register_operand")
8578   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8579   (match_operand:SI 4 "const_0_to_255_operand")
8580   (match_operand:<avx512fmaskmode> 5 "register_operand")]
8581  "TARGET_AVX512F"
8582{
8583  emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8584	operands[0], operands[1], operands[2], operands[3],
8585	operands[4], CONST0_RTX (<MODE>mode), operands[5]
8586	<round_saeonly_expand_operand6>));
8587  DONE;
8588})
8589
8590(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8591  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8592        (unspec:VF_AVX512VL
8593          [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8594	   (match_operand:VF_AVX512VL 2 "register_operand" "v")
8595           (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8596           (match_operand:SI 4 "const_0_to_255_operand")]
8597           UNSPEC_FIXUPIMM))]
8598  "TARGET_AVX512F"
8599  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8600  [(set_attr "prefix" "evex")
8601   (set_attr "mode" "<MODE>")])
8602
8603(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8604  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8605	(vec_merge:VF_AVX512VL
8606          (unspec:VF_AVX512VL
8607            [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8608	     (match_operand:VF_AVX512VL 2 "register_operand" "v")
8609             (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8610             (match_operand:SI 4 "const_0_to_255_operand")]
8611             UNSPEC_FIXUPIMM)
8612	  (match_dup 1)
8613	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8614  "TARGET_AVX512F"
8615  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8616  [(set_attr "prefix" "evex")
8617   (set_attr "mode" "<MODE>")])
8618
8619(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8620  [(match_operand:VF_128 0 "register_operand")
8621   (match_operand:VF_128 1 "register_operand")
8622   (match_operand:VF_128 2 "register_operand")
8623   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8624   (match_operand:SI 4 "const_0_to_255_operand")
8625   (match_operand:<avx512fmaskmode> 5 "register_operand")]
8626  "TARGET_AVX512F"
8627{
8628  emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8629	operands[0], operands[1], operands[2], operands[3],
8630	operands[4], CONST0_RTX (<MODE>mode), operands[5]
8631	<round_saeonly_expand_operand6>));
8632  DONE;
8633})
8634
8635(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
8636  [(set (match_operand:VF_128 0 "register_operand" "=v")
8637	(vec_merge:VF_128
8638          (unspec:VF_128
8639            [(match_operand:VF_128 1 "register_operand" "0")
8640	     (match_operand:VF_128 2 "register_operand" "v")
8641	     (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8642	     (match_operand:SI 4 "const_0_to_255_operand")]
8643	    UNSPEC_FIXUPIMM)
8644	  (match_dup 1)
8645	  (const_int 1)))]
8646   "TARGET_AVX512F"
8647   "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
8648   [(set_attr "prefix" "evex")
8649   (set_attr "mode" "<ssescalarmode>")])
8650
8651(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
8652  [(set (match_operand:VF_128 0 "register_operand" "=v")
8653	(vec_merge:VF_128
8654	  (vec_merge:VF_128
8655	    (unspec:VF_128
8656	       [(match_operand:VF_128 1 "register_operand" "0")
8657		(match_operand:VF_128 2 "register_operand" "v")
8658		(match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8659		(match_operand:SI 4 "const_0_to_255_operand")]
8660	       UNSPEC_FIXUPIMM)
8661	    (match_dup 1)
8662	    (const_int 1))
8663	  (match_dup 1)
8664	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8665  "TARGET_AVX512F"
8666  "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
8667  [(set_attr "prefix" "evex")
8668   (set_attr "mode" "<ssescalarmode>")])
8669
8670(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8671  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8672	(unspec:VF_AVX512VL
8673	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8674	   (match_operand:SI 2 "const_0_to_255_operand")]
8675	  UNSPEC_ROUND))]
8676  "TARGET_AVX512F"
8677  "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8678  [(set_attr "length_immediate" "1")
8679   (set_attr "prefix" "evex")
8680   (set_attr "mode" "<MODE>")])
8681
8682(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8683  [(set (match_operand:VF_128 0 "register_operand" "=v")
8684	(vec_merge:VF_128
8685	  (unspec:VF_128
8686	    [(match_operand:VF_128 1 "register_operand" "v")
8687	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8688	     (match_operand:SI 3 "const_0_to_255_operand")]
8689	    UNSPEC_ROUND)
8690	  (match_dup 1)
8691	  (const_int 1)))]
8692  "TARGET_AVX512F"
8693  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
8694  [(set_attr "length_immediate" "1")
8695   (set_attr "prefix" "evex")
8696   (set_attr "mode" "<MODE>")])
8697
8698;; One bit in mask selects 2 elements.
8699(define_insn "avx512f_shufps512_1<mask_name>"
8700  [(set (match_operand:V16SF 0 "register_operand" "=v")
8701	(vec_select:V16SF
8702	  (vec_concat:V32SF
8703	    (match_operand:V16SF 1 "register_operand" "v")
8704	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8705	  (parallel [(match_operand 3  "const_0_to_3_operand")
8706		     (match_operand 4  "const_0_to_3_operand")
8707		     (match_operand 5  "const_16_to_19_operand")
8708		     (match_operand 6  "const_16_to_19_operand")
8709		     (match_operand 7  "const_4_to_7_operand")
8710		     (match_operand 8  "const_4_to_7_operand")
8711		     (match_operand 9  "const_20_to_23_operand")
8712		     (match_operand 10  "const_20_to_23_operand")
8713		     (match_operand 11  "const_8_to_11_operand")
8714		     (match_operand 12  "const_8_to_11_operand")
8715		     (match_operand 13  "const_24_to_27_operand")
8716		     (match_operand 14  "const_24_to_27_operand")
8717		     (match_operand 15  "const_12_to_15_operand")
8718		     (match_operand 16  "const_12_to_15_operand")
8719		     (match_operand 17  "const_28_to_31_operand")
8720		     (match_operand 18  "const_28_to_31_operand")])))]
8721  "TARGET_AVX512F
8722   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8723       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8724       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8725       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8726       && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8727       && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8728       && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8729       && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8730       && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8731       && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8732       && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8733       && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8734{
8735  int mask;
8736  mask = INTVAL (operands[3]);
8737  mask |= INTVAL (operands[4]) << 2;
8738  mask |= (INTVAL (operands[5]) - 16) << 4;
8739  mask |= (INTVAL (operands[6]) - 16) << 6;
8740  operands[3] = GEN_INT (mask);
8741
8742  return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8743}
8744  [(set_attr "type" "sselog")
8745   (set_attr "length_immediate" "1")
8746   (set_attr "prefix" "evex")
8747   (set_attr "mode" "V16SF")])
8748
8749(define_expand "avx512f_shufpd512_mask"
8750  [(match_operand:V8DF 0 "register_operand")
8751   (match_operand:V8DF 1 "register_operand")
8752   (match_operand:V8DF 2 "nonimmediate_operand")
8753   (match_operand:SI 3 "const_0_to_255_operand")
8754   (match_operand:V8DF 4 "register_operand")
8755   (match_operand:QI 5 "register_operand")]
8756  "TARGET_AVX512F"
8757{
8758  int mask = INTVAL (operands[3]);
8759  emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8760					GEN_INT (mask & 1),
8761					GEN_INT (mask & 2 ? 9 : 8),
8762					GEN_INT (mask & 4 ? 3 : 2),
8763					GEN_INT (mask & 8 ? 11 : 10),
8764					GEN_INT (mask & 16 ? 5 : 4),
8765					GEN_INT (mask & 32 ? 13 : 12),
8766					GEN_INT (mask & 64 ? 7 : 6),
8767					GEN_INT (mask & 128 ? 15 : 14),
8768					operands[4], operands[5]));
8769  DONE;
8770})
8771
8772(define_insn "avx512f_shufpd512_1<mask_name>"
8773  [(set (match_operand:V8DF 0 "register_operand" "=v")
8774	(vec_select:V8DF
8775	  (vec_concat:V16DF
8776	    (match_operand:V8DF 1 "register_operand" "v")
8777	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8778	  (parallel [(match_operand 3 "const_0_to_1_operand")
8779		     (match_operand 4 "const_8_to_9_operand")
8780		     (match_operand 5 "const_2_to_3_operand")
8781		     (match_operand 6 "const_10_to_11_operand")
8782		     (match_operand 7 "const_4_to_5_operand")
8783		     (match_operand 8 "const_12_to_13_operand")
8784		     (match_operand 9 "const_6_to_7_operand")
8785		     (match_operand 10 "const_14_to_15_operand")])))]
8786  "TARGET_AVX512F"
8787{
8788  int mask;
8789  mask = INTVAL (operands[3]);
8790  mask |= (INTVAL (operands[4]) - 8) << 1;
8791  mask |= (INTVAL (operands[5]) - 2) << 2;
8792  mask |= (INTVAL (operands[6]) - 10) << 3;
8793  mask |= (INTVAL (operands[7]) - 4) << 4;
8794  mask |= (INTVAL (operands[8]) - 12) << 5;
8795  mask |= (INTVAL (operands[9]) - 6) << 6;
8796  mask |= (INTVAL (operands[10]) - 14) << 7;
8797  operands[3] = GEN_INT (mask);
8798
8799  return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8800}
8801  [(set_attr "type" "sselog")
8802   (set_attr "length_immediate" "1")
8803   (set_attr "prefix" "evex")
8804   (set_attr "mode" "V8DF")])
8805
8806(define_expand "avx_shufpd256<mask_expand4_name>"
8807  [(match_operand:V4DF 0 "register_operand")
8808   (match_operand:V4DF 1 "register_operand")
8809   (match_operand:V4DF 2 "nonimmediate_operand")
8810   (match_operand:SI 3 "const_int_operand")]
8811  "TARGET_AVX"
8812{
8813  int mask = INTVAL (operands[3]);
8814  emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8815						     operands[1],
8816						     operands[2],
8817						     GEN_INT (mask & 1),
8818						     GEN_INT (mask & 2 ? 5 : 4),
8819						     GEN_INT (mask & 4 ? 3 : 2),
8820						     GEN_INT (mask & 8 ? 7 : 6)
8821						     <mask_expand4_args>));
8822  DONE;
8823})
8824
8825(define_insn "avx_shufpd256_1<mask_name>"
8826  [(set (match_operand:V4DF 0 "register_operand" "=v")
8827	(vec_select:V4DF
8828	  (vec_concat:V8DF
8829	    (match_operand:V4DF 1 "register_operand" "v")
8830	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8831	  (parallel [(match_operand 3 "const_0_to_1_operand")
8832		     (match_operand 4 "const_4_to_5_operand")
8833		     (match_operand 5 "const_2_to_3_operand")
8834		     (match_operand 6 "const_6_to_7_operand")])))]
8835  "TARGET_AVX && <mask_avx512vl_condition>"
8836{
8837  int mask;
8838  mask = INTVAL (operands[3]);
8839  mask |= (INTVAL (operands[4]) - 4) << 1;
8840  mask |= (INTVAL (operands[5]) - 2) << 2;
8841  mask |= (INTVAL (operands[6]) - 6) << 3;
8842  operands[3] = GEN_INT (mask);
8843
8844  return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8845}
8846  [(set_attr "type" "sseshuf")
8847   (set_attr "length_immediate" "1")
8848   (set_attr "prefix" "vex")
8849   (set_attr "mode" "V4DF")])
8850
8851(define_expand "sse2_shufpd<mask_expand4_name>"
8852  [(match_operand:V2DF 0 "register_operand")
8853   (match_operand:V2DF 1 "register_operand")
8854   (match_operand:V2DF 2 "vector_operand")
8855   (match_operand:SI 3 "const_int_operand")]
8856  "TARGET_SSE2"
8857{
8858  int mask = INTVAL (operands[3]);
8859  emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8860						      operands[2], GEN_INT (mask & 1),
8861						      GEN_INT (mask & 2 ? 3 : 2)
8862						      <mask_expand4_args>));
8863  DONE;
8864})
8865
8866(define_insn "sse2_shufpd_v2df_mask"
8867  [(set (match_operand:V2DF 0 "register_operand" "=v")
8868    (vec_merge:V2DF
8869	  (vec_select:V2DF
8870	    (vec_concat:V4DF
8871	      (match_operand:V2DF 1 "register_operand" "v")
8872	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8873	    (parallel [(match_operand 3 "const_0_to_1_operand")
8874		           (match_operand 4 "const_2_to_3_operand")]))
8875      (match_operand:V2DF 5 "vector_move_operand" "0C")
8876      (match_operand:QI 6 "register_operand" "Yk")))]
8877  "TARGET_AVX512VL"
8878{
8879  int mask;
8880  mask = INTVAL (operands[3]);
8881  mask |= (INTVAL (operands[4]) - 2) << 1;
8882  operands[3] = GEN_INT (mask);
8883
8884  return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
8885}
8886  [(set_attr "type" "sseshuf")
8887   (set_attr "length_immediate" "1")
8888   (set_attr "prefix" "evex")
8889   (set_attr "mode" "V2DF")])
8890
8891;; punpcklqdq and punpckhqdq are shorter than shufpd.
8892(define_insn "avx2_interleave_highv4di<mask_name>"
8893  [(set (match_operand:V4DI 0 "register_operand" "=v")
8894	(vec_select:V4DI
8895	  (vec_concat:V8DI
8896	    (match_operand:V4DI 1 "register_operand" "v")
8897	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8898	  (parallel [(const_int 1)
8899		     (const_int 5)
8900		     (const_int 3)
8901		     (const_int 7)])))]
8902  "TARGET_AVX2 && <mask_avx512vl_condition>"
8903  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8904  [(set_attr "type" "sselog")
8905   (set_attr "prefix" "vex")
8906   (set_attr "mode" "OI")])
8907
8908(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8909  [(set (match_operand:V8DI 0 "register_operand" "=v")
8910	(vec_select:V8DI
8911	  (vec_concat:V16DI
8912	    (match_operand:V8DI 1 "register_operand" "v")
8913	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8914	  (parallel [(const_int 1) (const_int 9)
8915		     (const_int 3) (const_int 11)
8916		     (const_int 5) (const_int 13)
8917		     (const_int 7) (const_int 15)])))]
8918  "TARGET_AVX512F"
8919  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8920  [(set_attr "type" "sselog")
8921   (set_attr "prefix" "evex")
8922   (set_attr "mode" "XI")])
8923
8924(define_insn "vec_interleave_highv2di<mask_name>"
8925  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8926	(vec_select:V2DI
8927	  (vec_concat:V4DI
8928	    (match_operand:V2DI 1 "register_operand" "0,v")
8929	    (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8930	  (parallel [(const_int 1)
8931		     (const_int 3)])))]
8932  "TARGET_SSE2 && <mask_avx512vl_condition>"
8933  "@
8934   punpckhqdq\t{%2, %0|%0, %2}
8935   vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8936  [(set_attr "isa" "noavx,avx")
8937   (set_attr "type" "sselog")
8938   (set_attr "prefix_data16" "1,*")
8939   (set_attr "prefix" "orig,<mask_prefix>")
8940   (set_attr "mode" "TI")])
8941
8942(define_insn "avx2_interleave_lowv4di<mask_name>"
8943  [(set (match_operand:V4DI 0 "register_operand" "=v")
8944	(vec_select:V4DI
8945	  (vec_concat:V8DI
8946	    (match_operand:V4DI 1 "register_operand" "v")
8947	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8948	  (parallel [(const_int 0)
8949		     (const_int 4)
8950		     (const_int 2)
8951		     (const_int 6)])))]
8952  "TARGET_AVX2 && <mask_avx512vl_condition>"
8953  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8954  [(set_attr "type" "sselog")
8955   (set_attr "prefix" "vex")
8956   (set_attr "mode" "OI")])
8957
8958(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8959  [(set (match_operand:V8DI 0 "register_operand" "=v")
8960	(vec_select:V8DI
8961	  (vec_concat:V16DI
8962	    (match_operand:V8DI 1 "register_operand" "v")
8963	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8964	  (parallel [(const_int 0) (const_int 8)
8965		     (const_int 2) (const_int 10)
8966		     (const_int 4) (const_int 12)
8967		     (const_int 6) (const_int 14)])))]
8968  "TARGET_AVX512F"
8969  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8970  [(set_attr "type" "sselog")
8971   (set_attr "prefix" "evex")
8972   (set_attr "mode" "XI")])
8973
8974(define_insn "vec_interleave_lowv2di<mask_name>"
8975  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8976	(vec_select:V2DI
8977	  (vec_concat:V4DI
8978	    (match_operand:V2DI 1 "register_operand" "0,v")
8979	    (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8980	  (parallel [(const_int 0)
8981		     (const_int 2)])))]
8982  "TARGET_SSE2 && <mask_avx512vl_condition>"
8983  "@
8984   punpcklqdq\t{%2, %0|%0, %2}
8985   vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8986  [(set_attr "isa" "noavx,avx")
8987   (set_attr "type" "sselog")
8988   (set_attr "prefix_data16" "1,*")
8989   (set_attr "prefix" "orig,vex")
8990   (set_attr "mode" "TI")])
8991
8992(define_insn "sse2_shufpd_<mode>"
8993  [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
8994	(vec_select:VI8F_128
8995	  (vec_concat:<ssedoublevecmode>
8996	    (match_operand:VI8F_128 1 "register_operand" "0,v")
8997	    (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
8998	  (parallel [(match_operand 3 "const_0_to_1_operand")
8999		     (match_operand 4 "const_2_to_3_operand")])))]
9000  "TARGET_SSE2"
9001{
9002  int mask;
9003  mask = INTVAL (operands[3]);
9004  mask |= (INTVAL (operands[4]) - 2) << 1;
9005  operands[3] = GEN_INT (mask);
9006
9007  switch (which_alternative)
9008    {
9009    case 0:
9010      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
9011    case 1:
9012      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9013    default:
9014      gcc_unreachable ();
9015    }
9016}
9017  [(set_attr "isa" "noavx,avx")
9018   (set_attr "type" "sseshuf")
9019   (set_attr "length_immediate" "1")
9020   (set_attr "prefix" "orig,maybe_evex")
9021   (set_attr "mode" "V2DF")])
9022
9023;; Avoid combining registers from different units in a single alternative,
9024;; see comment above inline_secondary_memory_needed function in i386.c
9025(define_insn "sse2_storehpd"
9026  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,Yv,x,*f,r")
9027	(vec_select:DF
9028	  (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
9029	  (parallel [(const_int 1)])))]
9030  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9031  "@
9032   %vmovhpd\t{%1, %0|%0, %1}
9033   unpckhpd\t%0, %0
9034   vunpckhpd\t{%d1, %0|%0, %d1}
9035   #
9036   #
9037   #"
9038  [(set_attr "isa" "*,noavx,avx,*,*,*")
9039   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
9040   (set (attr "prefix_data16")
9041     (if_then_else
9042       (and (eq_attr "alternative" "0")
9043	    (not (match_test "TARGET_AVX")))
9044       (const_string "1")
9045       (const_string "*")))
9046   (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
9047   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
9048
9049(define_split
9050  [(set (match_operand:DF 0 "register_operand")
9051	(vec_select:DF
9052	  (match_operand:V2DF 1 "memory_operand")
9053	  (parallel [(const_int 1)])))]
9054  "TARGET_SSE2 && reload_completed"
9055  [(set (match_dup 0) (match_dup 1))]
9056  "operands[1] = adjust_address (operands[1], DFmode, 8);")
9057
9058(define_insn "*vec_extractv2df_1_sse"
9059  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9060	(vec_select:DF
9061	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
9062	  (parallel [(const_int 1)])))]
9063  "!TARGET_SSE2 && TARGET_SSE
9064   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9065  "@
9066   movhps\t{%1, %0|%q0, %1}
9067   movhlps\t{%1, %0|%0, %1}
9068   movlps\t{%H1, %0|%0, %H1}"
9069  [(set_attr "type" "ssemov")
9070   (set_attr "mode" "V2SF,V4SF,V2SF")])
9071
9072;; Avoid combining registers from different units in a single alternative,
9073;; see comment above inline_secondary_memory_needed function in i386.c
9074(define_insn "sse2_storelpd"
9075  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
9076	(vec_select:DF
9077	  (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
9078	  (parallel [(const_int 0)])))]
9079  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9080  "@
9081   %vmovlpd\t{%1, %0|%0, %1}
9082   #
9083   #
9084   #
9085   #"
9086  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
9087   (set (attr "prefix_data16")
9088     (if_then_else (eq_attr "alternative" "0")
9089		   (const_string "1")
9090		   (const_string "*")))
9091   (set_attr "prefix" "maybe_vex")
9092   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
9093
9094(define_split
9095  [(set (match_operand:DF 0 "register_operand")
9096	(vec_select:DF
9097	  (match_operand:V2DF 1 "nonimmediate_operand")
9098	  (parallel [(const_int 0)])))]
9099  "TARGET_SSE2 && reload_completed"
9100  [(set (match_dup 0) (match_dup 1))]
9101  "operands[1] = gen_lowpart (DFmode, operands[1]);")
9102
9103(define_insn "*vec_extractv2df_0_sse"
9104  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9105	(vec_select:DF
9106	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
9107	  (parallel [(const_int 0)])))]
9108  "!TARGET_SSE2 && TARGET_SSE
9109   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9110  "@
9111   movlps\t{%1, %0|%0, %1}
9112   movaps\t{%1, %0|%0, %1}
9113   movlps\t{%1, %0|%0, %q1}"
9114  [(set_attr "type" "ssemov")
9115   (set_attr "mode" "V2SF,V4SF,V2SF")])
9116
9117(define_expand "sse2_loadhpd_exp"
9118  [(set (match_operand:V2DF 0 "nonimmediate_operand")
9119	(vec_concat:V2DF
9120	  (vec_select:DF
9121	    (match_operand:V2DF 1 "nonimmediate_operand")
9122	    (parallel [(const_int 0)]))
9123	  (match_operand:DF 2 "nonimmediate_operand")))]
9124  "TARGET_SSE2"
9125{
9126  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9127
9128  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
9129
9130  /* Fix up the destination if needed.  */
9131  if (dst != operands[0])
9132    emit_move_insn (operands[0], dst);
9133
9134  DONE;
9135})
9136
9137;; Avoid combining registers from different units in a single alternative,
9138;; see comment above inline_secondary_memory_needed function in i386.c
9139(define_insn "sse2_loadhpd"
9140  [(set (match_operand:V2DF 0 "nonimmediate_operand"
9141	  "=x,v,x,v ,o,o ,o")
9142	(vec_concat:V2DF
9143	  (vec_select:DF
9144	    (match_operand:V2DF 1 "nonimmediate_operand"
9145	  " 0,v,0,v ,0,0 ,0")
9146	    (parallel [(const_int 0)]))
9147	  (match_operand:DF 2 "nonimmediate_operand"
9148	  " m,m,x,Yv,x,*f,r")))]
9149  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9150  "@
9151   movhpd\t{%2, %0|%0, %2}
9152   vmovhpd\t{%2, %1, %0|%0, %1, %2}
9153   unpcklpd\t{%2, %0|%0, %2}
9154   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9155   #
9156   #
9157   #"
9158  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
9159   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
9160   (set (attr "prefix_data16")
9161     (if_then_else (eq_attr "alternative" "0")
9162		   (const_string "1")
9163		   (const_string "*")))
9164   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
9165   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
9166
9167(define_split
9168  [(set (match_operand:V2DF 0 "memory_operand")
9169	(vec_concat:V2DF
9170	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
9171	  (match_operand:DF 1 "register_operand")))]
9172  "TARGET_SSE2 && reload_completed"
9173  [(set (match_dup 0) (match_dup 1))]
9174  "operands[0] = adjust_address (operands[0], DFmode, 8);")
9175
9176(define_expand "sse2_loadlpd_exp"
9177  [(set (match_operand:V2DF 0 "nonimmediate_operand")
9178	(vec_concat:V2DF
9179	  (match_operand:DF 2 "nonimmediate_operand")
9180	  (vec_select:DF
9181	    (match_operand:V2DF 1 "nonimmediate_operand")
9182	    (parallel [(const_int 1)]))))]
9183  "TARGET_SSE2"
9184{
9185  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9186
9187  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9188
9189  /* Fix up the destination if needed.  */
9190  if (dst != operands[0])
9191    emit_move_insn (operands[0], dst);
9192
9193  DONE;
9194})
9195
9196;; Avoid combining registers from different units in a single alternative,
9197;; see comment above inline_secondary_memory_needed function in i386.c
9198(define_insn "sse2_loadlpd"
9199  [(set (match_operand:V2DF 0 "nonimmediate_operand"
9200	  "=v,x,v,x,v,x,x,v,m,m ,m")
9201	(vec_concat:V2DF
9202	  (match_operand:DF 2 "nonimmediate_operand"
9203	  "vm,m,m,x,v,0,0,v,x,*f,r")
9204	  (vec_select:DF
9205	    (match_operand:V2DF 1 "vector_move_operand"
9206	  " C,0,v,0,v,x,o,o,0,0 ,0")
9207	    (parallel [(const_int 1)]))))]
9208  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9209  "@
9210   %vmovq\t{%2, %0|%0, %2}
9211   movlpd\t{%2, %0|%0, %2}
9212   vmovlpd\t{%2, %1, %0|%0, %1, %2}
9213   movsd\t{%2, %0|%0, %2}
9214   vmovsd\t{%2, %1, %0|%0, %1, %2}
9215   shufpd\t{$2, %1, %0|%0, %1, 2}
9216   movhpd\t{%H1, %0|%0, %H1}
9217   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9218   #
9219   #
9220   #"
9221  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9222   (set (attr "type")
9223     (cond [(eq_attr "alternative" "5")
9224	      (const_string "sselog")
9225	    (eq_attr "alternative" "9")
9226	      (const_string "fmov")
9227	    (eq_attr "alternative" "10")
9228	      (const_string "imov")
9229	   ]
9230	   (const_string "ssemov")))
9231   (set (attr "prefix_data16")
9232     (if_then_else (eq_attr "alternative" "1,6")
9233		   (const_string "1")
9234		   (const_string "*")))
9235   (set (attr "length_immediate")
9236     (if_then_else (eq_attr "alternative" "5")
9237		   (const_string "1")
9238		   (const_string "*")))
9239   (set (attr "prefix")
9240     (cond [(eq_attr "alternative" "0")
9241	      (const_string "maybe_vex")
9242	    (eq_attr "alternative" "1,3,5,6")
9243	      (const_string "orig")
9244	    (eq_attr "alternative" "2,4,7")
9245	      (const_string "maybe_evex")
9246	   ]
9247	   (const_string "*")))
9248   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9249
9250(define_split
9251  [(set (match_operand:V2DF 0 "memory_operand")
9252	(vec_concat:V2DF
9253	  (match_operand:DF 1 "register_operand")
9254	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9255  "TARGET_SSE2 && reload_completed"
9256  [(set (match_dup 0) (match_dup 1))]
9257  "operands[0] = adjust_address (operands[0], DFmode, 0);")
9258
9259(define_insn "sse2_movsd"
9260  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,v,x,v,m,x,x,v,o")
9261	(vec_merge:V2DF
9262	  (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9263	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9264	  (const_int 1)))]
9265  "TARGET_SSE2"
9266  "@
9267   movsd\t{%2, %0|%0, %2}
9268   vmovsd\t{%2, %1, %0|%0, %1, %2}
9269   movlpd\t{%2, %0|%0, %q2}
9270   vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9271   %vmovlpd\t{%2, %0|%q0, %2}
9272   shufpd\t{$2, %1, %0|%0, %1, 2}
9273   movhps\t{%H1, %0|%0, %H1}
9274   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9275   %vmovhps\t{%1, %H0|%H0, %1}"
9276  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9277   (set (attr "type")
9278     (if_then_else
9279       (eq_attr "alternative" "5")
9280       (const_string "sselog")
9281       (const_string "ssemov")))
9282   (set (attr "prefix_data16")
9283     (if_then_else
9284       (and (eq_attr "alternative" "2,4")
9285	    (not (match_test "TARGET_AVX")))
9286       (const_string "1")
9287       (const_string "*")))
9288   (set (attr "length_immediate")
9289     (if_then_else (eq_attr "alternative" "5")
9290		   (const_string "1")
9291		   (const_string "*")))
9292   (set (attr "prefix")
9293     (cond [(eq_attr "alternative" "1,3,7")
9294	      (const_string "maybe_evex")
9295	    (eq_attr "alternative" "4,8")
9296	      (const_string "maybe_vex")
9297	   ]
9298	   (const_string "orig")))
9299   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9300
9301(define_insn "vec_dupv2df<mask_name>"
9302  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v")
9303	(vec_duplicate:V2DF
9304	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
9305  "TARGET_SSE2 && <mask_avx512vl_condition>"
9306  "@
9307   unpcklpd\t%0, %0
9308   %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
9309   vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
9310  [(set_attr "isa" "noavx,sse3,avx512vl")
9311   (set_attr "type" "sselog1")
9312   (set_attr "prefix" "orig,maybe_vex,evex")
9313   (set_attr "mode" "V2DF,DF,DF")])
9314
9315(define_insn "vec_concatv2df"
9316  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x, v,x,x")
9317	(vec_concat:V2DF
9318	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
9319	  (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m, C,x,m")))]
9320  "TARGET_SSE
9321   && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
9322       || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
9323  "@
9324   unpcklpd\t{%2, %0|%0, %2}
9325   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9326   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9327   %vmovddup\t{%1, %0|%0, %1}
9328   vmovddup\t{%1, %0|%0, %1}
9329   movhpd\t{%2, %0|%0, %2}
9330   vmovhpd\t{%2, %1, %0|%0, %1, %2}
9331   %vmovq\t{%1, %0|%0, %1}
9332   movlhps\t{%2, %0|%0, %2}
9333   movhps\t{%2, %0|%0, %2}"
9334  [(set (attr "isa")
9335     (cond [(eq_attr "alternative" "0,5")
9336	      (const_string "sse2_noavx")
9337	    (eq_attr "alternative" "1,6")
9338	      (const_string "avx")
9339	    (eq_attr "alternative" "2,4")
9340	      (const_string "avx512vl")
9341	    (eq_attr "alternative" "3")
9342	      (const_string "sse3")
9343	    (eq_attr "alternative" "7")
9344	      (const_string "sse2")
9345	   ]
9346	   (const_string "noavx")))
9347   (set (attr "type")
9348     (if_then_else
9349       (eq_attr "alternative" "0,1,2,3,4")
9350       (const_string "sselog")
9351       (const_string "ssemov")))
9352   (set (attr "prefix_data16")
9353	(if_then_else (eq_attr "alternative" "5")
9354		      (const_string "1")
9355		      (const_string "*")))
9356   (set (attr "prefix")
9357     (cond [(eq_attr "alternative" "1,6")
9358	      (const_string "vex")
9359	    (eq_attr "alternative" "2,4")
9360	      (const_string "evex")
9361	    (eq_attr "alternative" "3,7")
9362	      (const_string "maybe_vex")
9363	   ]
9364	   (const_string "orig")))
9365   (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
9366
9367;; vmovq clears also the higher bits.
9368(define_insn "vec_set<mode>_0"
9369  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
9370	(vec_merge:VF2_512_256
9371	  (vec_duplicate:VF2_512_256
9372	    (match_operand:<ssescalarmode> 2 "general_operand" "xm"))
9373	  (match_operand:VF2_512_256 1 "const0_operand" "C")
9374	  (const_int 1)))]
9375  "TARGET_AVX"
9376  "vmovq\t{%2, %x0|%x0, %2}"
9377  [(set_attr "type" "ssemov")
9378   (set_attr "prefix" "maybe_evex")
9379   (set_attr "mode" "DF")])
9380
9381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9382;;
9383;; Parallel integer down-conversion operations
9384;;
9385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9386
9387(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
9388(define_mode_attr pmov_src_mode
9389  [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
9390(define_mode_attr pmov_src_lower
9391  [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
9392(define_mode_attr pmov_suff_1
9393  [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
9394
9395(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
9396  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9397	(any_truncate:PMOV_DST_MODE_1
9398	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
9399  "TARGET_AVX512F"
9400  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
9401  [(set_attr "type" "ssemov")
9402   (set_attr "memory" "none,store")
9403   (set_attr "prefix" "evex")
9404   (set_attr "mode" "<sseinsnmode>")])
9405
9406(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9407  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9408    (vec_merge:PMOV_DST_MODE_1
9409      (any_truncate:PMOV_DST_MODE_1
9410        (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9411      (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
9412      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9413  "TARGET_AVX512F"
9414  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9415  [(set_attr "type" "ssemov")
9416   (set_attr "memory" "none,store")
9417   (set_attr "prefix" "evex")
9418   (set_attr "mode" "<sseinsnmode>")])
9419
9420(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9421  [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9422    (vec_merge:PMOV_DST_MODE_1
9423      (any_truncate:PMOV_DST_MODE_1
9424        (match_operand:<pmov_src_mode> 1 "register_operand"))
9425      (match_dup 0)
9426      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9427  "TARGET_AVX512F")
9428
9429(define_insn "avx512bw_<code>v32hiv32qi2"
9430  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9431	(any_truncate:V32QI
9432	    (match_operand:V32HI 1 "register_operand" "v,v")))]
9433  "TARGET_AVX512BW"
9434  "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9435  [(set_attr "type" "ssemov")
9436   (set_attr "memory" "none,store")
9437   (set_attr "prefix" "evex")
9438   (set_attr "mode" "XI")])
9439
9440(define_insn "avx512bw_<code>v32hiv32qi2_mask"
9441  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9442    (vec_merge:V32QI
9443      (any_truncate:V32QI
9444        (match_operand:V32HI 1 "register_operand" "v,v"))
9445      (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9446      (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9447  "TARGET_AVX512BW"
9448  "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9449  [(set_attr "type" "ssemov")
9450   (set_attr "memory" "none,store")
9451   (set_attr "prefix" "evex")
9452   (set_attr "mode" "XI")])
9453
9454(define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9455  [(set (match_operand:V32QI 0 "nonimmediate_operand")
9456    (vec_merge:V32QI
9457      (any_truncate:V32QI
9458        (match_operand:V32HI 1 "register_operand"))
9459      (match_dup 0)
9460      (match_operand:SI 2 "register_operand")))]
9461  "TARGET_AVX512BW")
9462
9463(define_mode_iterator PMOV_DST_MODE_2
9464  [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9465(define_mode_attr pmov_suff_2
9466  [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9467
9468(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9469  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9470	(any_truncate:PMOV_DST_MODE_2
9471	    (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9472  "TARGET_AVX512VL"
9473  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9474  [(set_attr "type" "ssemov")
9475   (set_attr "memory" "none,store")
9476   (set_attr "prefix" "evex")
9477   (set_attr "mode" "<sseinsnmode>")])
9478
9479(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9480  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9481    (vec_merge:PMOV_DST_MODE_2
9482      (any_truncate:PMOV_DST_MODE_2
9483        (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9484      (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9485      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9486  "TARGET_AVX512VL"
9487  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9488  [(set_attr "type" "ssemov")
9489   (set_attr "memory" "none,store")
9490   (set_attr "prefix" "evex")
9491   (set_attr "mode" "<sseinsnmode>")])
9492
9493(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9494  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9495    (vec_merge:PMOV_DST_MODE_2
9496      (any_truncate:PMOV_DST_MODE_2
9497        (match_operand:<ssedoublemode> 1 "register_operand"))
9498      (match_dup 0)
9499      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9500  "TARGET_AVX512VL")
9501
9502(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9503(define_mode_attr pmov_dst_3
9504  [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9505(define_mode_attr pmov_dst_zeroed_3
9506  [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9507(define_mode_attr pmov_suff_3
9508  [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9509
9510(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9511  [(set (match_operand:V16QI 0 "register_operand" "=v")
9512    (vec_concat:V16QI
9513      (any_truncate:<pmov_dst_3>
9514	      (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9515      (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9516  "TARGET_AVX512VL"
9517  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9518  [(set_attr "type" "ssemov")
9519   (set_attr "prefix" "evex")
9520   (set_attr "mode" "TI")])
9521
9522(define_insn "*avx512vl_<code>v2div2qi2_store"
9523  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9524    (vec_concat:V16QI
9525      (any_truncate:V2QI
9526	      (match_operand:V2DI 1 "register_operand" "v"))
9527      (vec_select:V14QI
9528        (match_dup 0)
9529        (parallel [(const_int 2) (const_int 3)
9530                   (const_int 4) (const_int 5)
9531                   (const_int 6) (const_int 7)
9532                   (const_int 8) (const_int 9)
9533                   (const_int 10) (const_int 11)
9534                   (const_int 12) (const_int 13)
9535                   (const_int 14) (const_int 15)]))))]
9536  "TARGET_AVX512VL"
9537  "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
9538  [(set_attr "type" "ssemov")
9539   (set_attr "memory" "store")
9540   (set_attr "prefix" "evex")
9541   (set_attr "mode" "TI")])
9542
9543(define_insn "avx512vl_<code>v2div2qi2_mask"
9544  [(set (match_operand:V16QI 0 "register_operand" "=v")
9545    (vec_concat:V16QI
9546      (vec_merge:V2QI
9547        (any_truncate:V2QI
9548          (match_operand:V2DI 1 "register_operand" "v"))
9549        (vec_select:V2QI
9550          (match_operand:V16QI 2 "vector_move_operand" "0C")
9551          (parallel [(const_int 0) (const_int 1)]))
9552        (match_operand:QI 3 "register_operand" "Yk"))
9553      (const_vector:V14QI [(const_int 0) (const_int 0)
9554                           (const_int 0) (const_int 0)
9555                           (const_int 0) (const_int 0)
9556                           (const_int 0) (const_int 0)
9557                           (const_int 0) (const_int 0)
9558                           (const_int 0) (const_int 0)
9559                           (const_int 0) (const_int 0)])))]
9560  "TARGET_AVX512VL"
9561  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9562  [(set_attr "type" "ssemov")
9563   (set_attr "prefix" "evex")
9564   (set_attr "mode" "TI")])
9565
9566(define_insn "*avx512vl_<code>v2div2qi2_mask_1"
9567  [(set (match_operand:V16QI 0 "register_operand" "=v")
9568    (vec_concat:V16QI
9569      (vec_merge:V2QI
9570	(any_truncate:V2QI
9571	  (match_operand:V2DI 1 "register_operand" "v"))
9572	(const_vector:V2QI [(const_int 0) (const_int 0)])
9573	(match_operand:QI 2 "register_operand" "Yk"))
9574      (const_vector:V14QI [(const_int 0) (const_int 0)
9575			   (const_int 0) (const_int 0)
9576			   (const_int 0) (const_int 0)
9577			   (const_int 0) (const_int 0)
9578			   (const_int 0) (const_int 0)
9579			   (const_int 0) (const_int 0)
9580			   (const_int 0) (const_int 0)])))]
9581  "TARGET_AVX512VL"
9582  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9583  [(set_attr "type" "ssemov")
9584   (set_attr "prefix" "evex")
9585   (set_attr "mode" "TI")])
9586
9587(define_insn "avx512vl_<code>v2div2qi2_mask_store"
9588  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9589    (vec_concat:V16QI
9590      (vec_merge:V2QI
9591        (any_truncate:V2QI
9592          (match_operand:V2DI 1 "register_operand" "v"))
9593        (vec_select:V2QI
9594          (match_dup 0)
9595          (parallel [(const_int 0) (const_int 1)]))
9596        (match_operand:QI 2 "register_operand" "Yk"))
9597      (vec_select:V14QI
9598        (match_dup 0)
9599        (parallel [(const_int 2) (const_int 3)
9600                   (const_int 4) (const_int 5)
9601                   (const_int 6) (const_int 7)
9602                   (const_int 8) (const_int 9)
9603                   (const_int 10) (const_int 11)
9604                   (const_int 12) (const_int 13)
9605                   (const_int 14) (const_int 15)]))))]
9606  "TARGET_AVX512VL"
9607  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
9608  [(set_attr "type" "ssemov")
9609   (set_attr "memory" "store")
9610   (set_attr "prefix" "evex")
9611   (set_attr "mode" "TI")])
9612
9613(define_insn "*avx512vl_<code><mode>v4qi2_store"
9614  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9615    (vec_concat:V16QI
9616      (any_truncate:V4QI
9617	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9618      (vec_select:V12QI
9619        (match_dup 0)
9620        (parallel [(const_int 4) (const_int 5)
9621                   (const_int 6) (const_int 7)
9622                   (const_int 8) (const_int 9)
9623                   (const_int 10) (const_int 11)
9624                   (const_int 12) (const_int 13)
9625                   (const_int 14) (const_int 15)]))))]
9626  "TARGET_AVX512VL"
9627  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
9628  [(set_attr "type" "ssemov")
9629   (set_attr "memory" "store")
9630   (set_attr "prefix" "evex")
9631   (set_attr "mode" "TI")])
9632
9633(define_insn "avx512vl_<code><mode>v4qi2_mask"
9634  [(set (match_operand:V16QI 0 "register_operand" "=v")
9635    (vec_concat:V16QI
9636      (vec_merge:V4QI
9637        (any_truncate:V4QI
9638          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9639        (vec_select:V4QI
9640          (match_operand:V16QI 2 "vector_move_operand" "0C")
9641          (parallel [(const_int 0) (const_int 1)
9642                     (const_int 2) (const_int 3)]))
9643        (match_operand:QI 3 "register_operand" "Yk"))
9644      (const_vector:V12QI [(const_int 0) (const_int 0)
9645                           (const_int 0) (const_int 0)
9646                           (const_int 0) (const_int 0)
9647                           (const_int 0) (const_int 0)
9648                           (const_int 0) (const_int 0)
9649                           (const_int 0) (const_int 0)])))]
9650  "TARGET_AVX512VL"
9651  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9652  [(set_attr "type" "ssemov")
9653   (set_attr "prefix" "evex")
9654   (set_attr "mode" "TI")])
9655
9656(define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
9657  [(set (match_operand:V16QI 0 "register_operand" "=v")
9658    (vec_concat:V16QI
9659      (vec_merge:V4QI
9660	(any_truncate:V4QI
9661	  (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9662	(const_vector:V4QI [(const_int 0) (const_int 0)
9663			    (const_int 0) (const_int 0)])
9664	(match_operand:QI 2 "register_operand" "Yk"))
9665      (const_vector:V12QI [(const_int 0) (const_int 0)
9666			   (const_int 0) (const_int 0)
9667			   (const_int 0) (const_int 0)
9668			   (const_int 0) (const_int 0)
9669			   (const_int 0) (const_int 0)
9670			   (const_int 0) (const_int 0)])))]
9671  "TARGET_AVX512VL"
9672  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9673  [(set_attr "type" "ssemov")
9674   (set_attr "prefix" "evex")
9675   (set_attr "mode" "TI")])
9676
9677(define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9678  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9679    (vec_concat:V16QI
9680      (vec_merge:V4QI
9681        (any_truncate:V4QI
9682          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9683        (vec_select:V4QI
9684          (match_dup 0)
9685          (parallel [(const_int 0) (const_int 1)
9686                     (const_int 2) (const_int 3)]))
9687        (match_operand:QI 2 "register_operand" "Yk"))
9688      (vec_select:V12QI
9689        (match_dup 0)
9690        (parallel [(const_int 4) (const_int 5)
9691                   (const_int 6) (const_int 7)
9692                   (const_int 8) (const_int 9)
9693                   (const_int 10) (const_int 11)
9694                   (const_int 12) (const_int 13)
9695                   (const_int 14) (const_int 15)]))))]
9696  "TARGET_AVX512VL"
9697  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
9698  [(set_attr "type" "ssemov")
9699   (set_attr "memory" "store")
9700   (set_attr "prefix" "evex")
9701   (set_attr "mode" "TI")])
9702
9703(define_mode_iterator VI2_128_BW_4_256
9704  [(V8HI "TARGET_AVX512BW") V8SI])
9705
9706(define_insn "*avx512vl_<code><mode>v8qi2_store"
9707  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9708    (vec_concat:V16QI
9709      (any_truncate:V8QI
9710	      (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9711      (vec_select:V8QI
9712        (match_dup 0)
9713        (parallel [(const_int 8) (const_int 9)
9714                   (const_int 10) (const_int 11)
9715                   (const_int 12) (const_int 13)
9716                   (const_int 14) (const_int 15)]))))]
9717  "TARGET_AVX512VL"
9718  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
9719  [(set_attr "type" "ssemov")
9720   (set_attr "memory" "store")
9721   (set_attr "prefix" "evex")
9722   (set_attr "mode" "TI")])
9723
9724(define_insn "avx512vl_<code><mode>v8qi2_mask"
9725  [(set (match_operand:V16QI 0 "register_operand" "=v")
9726    (vec_concat:V16QI
9727      (vec_merge:V8QI
9728        (any_truncate:V8QI
9729          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9730        (vec_select:V8QI
9731          (match_operand:V16QI 2 "vector_move_operand" "0C")
9732          (parallel [(const_int 0) (const_int 1)
9733                     (const_int 2) (const_int 3)
9734                     (const_int 4) (const_int 5)
9735                     (const_int 6) (const_int 7)]))
9736        (match_operand:QI 3 "register_operand" "Yk"))
9737      (const_vector:V8QI [(const_int 0) (const_int 0)
9738                          (const_int 0) (const_int 0)
9739                          (const_int 0) (const_int 0)
9740                          (const_int 0) (const_int 0)])))]
9741  "TARGET_AVX512VL"
9742  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9743  [(set_attr "type" "ssemov")
9744   (set_attr "prefix" "evex")
9745   (set_attr "mode" "TI")])
9746
9747(define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
9748  [(set (match_operand:V16QI 0 "register_operand" "=v")
9749    (vec_concat:V16QI
9750      (vec_merge:V8QI
9751	(any_truncate:V8QI
9752	  (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9753	(const_vector:V8QI [(const_int 0) (const_int 0)
9754			    (const_int 0) (const_int 0)
9755			    (const_int 0) (const_int 0)
9756			    (const_int 0) (const_int 0)])
9757	(match_operand:QI 2 "register_operand" "Yk"))
9758      (const_vector:V8QI [(const_int 0) (const_int 0)
9759			  (const_int 0) (const_int 0)
9760			  (const_int 0) (const_int 0)
9761			  (const_int 0) (const_int 0)])))]
9762  "TARGET_AVX512VL"
9763  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9764  [(set_attr "type" "ssemov")
9765   (set_attr "prefix" "evex")
9766   (set_attr "mode" "TI")])
9767
9768(define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9769  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9770    (vec_concat:V16QI
9771      (vec_merge:V8QI
9772        (any_truncate:V8QI
9773          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9774        (vec_select:V8QI
9775          (match_dup 0)
9776          (parallel [(const_int 0) (const_int 1)
9777                     (const_int 2) (const_int 3)
9778                     (const_int 4) (const_int 5)
9779                     (const_int 6) (const_int 7)]))
9780        (match_operand:QI 2 "register_operand" "Yk"))
9781      (vec_select:V8QI
9782        (match_dup 0)
9783        (parallel [(const_int 8) (const_int 9)
9784                   (const_int 10) (const_int 11)
9785                   (const_int 12) (const_int 13)
9786                   (const_int 14) (const_int 15)]))))]
9787  "TARGET_AVX512VL"
9788  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
9789  [(set_attr "type" "ssemov")
9790   (set_attr "memory" "store")
9791   (set_attr "prefix" "evex")
9792   (set_attr "mode" "TI")])
9793
9794(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9795(define_mode_attr pmov_dst_4
9796  [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9797(define_mode_attr pmov_dst_zeroed_4
9798  [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9799(define_mode_attr pmov_suff_4
9800  [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9801
9802(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9803  [(set (match_operand:V8HI 0 "register_operand" "=v")
9804    (vec_concat:V8HI
9805      (any_truncate:<pmov_dst_4>
9806	      (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9807      (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9808  "TARGET_AVX512VL"
9809  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9810  [(set_attr "type" "ssemov")
9811   (set_attr "prefix" "evex")
9812   (set_attr "mode" "TI")])
9813
9814(define_insn "*avx512vl_<code><mode>v4hi2_store"
9815  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9816    (vec_concat:V8HI
9817      (any_truncate:V4HI
9818	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9819      (vec_select:V4HI
9820        (match_dup 0)
9821        (parallel [(const_int 4) (const_int 5)
9822                   (const_int 6) (const_int 7)]))))]
9823  "TARGET_AVX512VL"
9824  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9825  [(set_attr "type" "ssemov")
9826   (set_attr "memory" "store")
9827   (set_attr "prefix" "evex")
9828   (set_attr "mode" "TI")])
9829
9830(define_insn "avx512vl_<code><mode>v4hi2_mask"
9831  [(set (match_operand:V8HI 0 "register_operand" "=v")
9832    (vec_concat:V8HI
9833      (vec_merge:V4HI
9834        (any_truncate:V4HI
9835          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9836        (vec_select:V4HI
9837          (match_operand:V8HI 2 "vector_move_operand" "0C")
9838          (parallel [(const_int 0) (const_int 1)
9839                     (const_int 2) (const_int 3)]))
9840        (match_operand:QI 3 "register_operand" "Yk"))
9841      (const_vector:V4HI [(const_int 0) (const_int 0)
9842                          (const_int 0) (const_int 0)])))]
9843  "TARGET_AVX512VL"
9844  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9845  [(set_attr "type" "ssemov")
9846   (set_attr "prefix" "evex")
9847   (set_attr "mode" "TI")])
9848
9849(define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9850  [(set (match_operand:V8HI 0 "register_operand" "=v")
9851    (vec_concat:V8HI
9852      (vec_merge:V4HI
9853	(any_truncate:V4HI
9854	  (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9855	(const_vector:V4HI [(const_int 0) (const_int 0)
9856			    (const_int 0) (const_int 0)])
9857	(match_operand:QI 2 "register_operand" "Yk"))
9858      (const_vector:V4HI [(const_int 0) (const_int 0)
9859			  (const_int 0) (const_int 0)])))]
9860  "TARGET_AVX512VL"
9861  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9862  [(set_attr "type" "ssemov")
9863   (set_attr "prefix" "evex")
9864   (set_attr "mode" "TI")])
9865
9866(define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9867  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9868    (vec_concat:V8HI
9869      (vec_merge:V4HI
9870        (any_truncate:V4HI
9871          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9872        (vec_select:V4HI
9873          (match_dup 0)
9874          (parallel [(const_int 0) (const_int 1)
9875                     (const_int 2) (const_int 3)]))
9876        (match_operand:QI 2 "register_operand" "Yk"))
9877      (vec_select:V4HI
9878        (match_dup 0)
9879        (parallel [(const_int 4) (const_int 5)
9880                   (const_int 6) (const_int 7)]))))]
9881  "TARGET_AVX512VL"
9882{
9883  if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9884    return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
9885  return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9886}
9887  [(set_attr "type" "ssemov")
9888   (set_attr "memory" "store")
9889   (set_attr "prefix" "evex")
9890   (set_attr "mode" "TI")])
9891
9892(define_insn "*avx512vl_<code>v2div2hi2_store"
9893  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9894    (vec_concat:V8HI
9895      (any_truncate:V2HI
9896	      (match_operand:V2DI 1 "register_operand" "v"))
9897      (vec_select:V6HI
9898        (match_dup 0)
9899        (parallel [(const_int 2) (const_int 3)
9900                   (const_int 4) (const_int 5)
9901                   (const_int 6) (const_int 7)]))))]
9902  "TARGET_AVX512VL"
9903  "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9904  [(set_attr "type" "ssemov")
9905   (set_attr "memory" "store")
9906   (set_attr "prefix" "evex")
9907   (set_attr "mode" "TI")])
9908
9909(define_insn "avx512vl_<code>v2div2hi2_mask"
9910  [(set (match_operand:V8HI 0 "register_operand" "=v")
9911    (vec_concat:V8HI
9912      (vec_merge:V2HI
9913        (any_truncate:V2HI
9914          (match_operand:V2DI 1 "register_operand" "v"))
9915        (vec_select:V2HI
9916          (match_operand:V8HI 2 "vector_move_operand" "0C")
9917          (parallel [(const_int 0) (const_int 1)]))
9918        (match_operand:QI 3 "register_operand" "Yk"))
9919      (const_vector:V6HI [(const_int 0) (const_int 0)
9920                          (const_int 0) (const_int 0)
9921                          (const_int 0) (const_int 0)])))]
9922  "TARGET_AVX512VL"
9923  "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9924  [(set_attr "type" "ssemov")
9925   (set_attr "prefix" "evex")
9926   (set_attr "mode" "TI")])
9927
9928(define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9929  [(set (match_operand:V8HI 0 "register_operand" "=v")
9930    (vec_concat:V8HI
9931      (vec_merge:V2HI
9932	(any_truncate:V2HI
9933	  (match_operand:V2DI 1 "register_operand" "v"))
9934	(const_vector:V2HI [(const_int 0) (const_int 0)])
9935	(match_operand:QI 2 "register_operand" "Yk"))
9936      (const_vector:V6HI [(const_int 0) (const_int 0)
9937			  (const_int 0) (const_int 0)
9938			  (const_int 0) (const_int 0)])))]
9939  "TARGET_AVX512VL"
9940  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9941  [(set_attr "type" "ssemov")
9942   (set_attr "prefix" "evex")
9943   (set_attr "mode" "TI")])
9944
9945(define_insn "avx512vl_<code>v2div2hi2_mask_store"
9946  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9947    (vec_concat:V8HI
9948      (vec_merge:V2HI
9949        (any_truncate:V2HI
9950          (match_operand:V2DI 1 "register_operand" "v"))
9951        (vec_select:V2HI
9952          (match_dup 0)
9953          (parallel [(const_int 0) (const_int 1)]))
9954        (match_operand:QI 2 "register_operand" "Yk"))
9955      (vec_select:V6HI
9956        (match_dup 0)
9957        (parallel [(const_int 2) (const_int 3)
9958                   (const_int 4) (const_int 5)
9959                   (const_int 6) (const_int 7)]))))]
9960  "TARGET_AVX512VL"
9961  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
9962  [(set_attr "type" "ssemov")
9963   (set_attr "memory" "store")
9964   (set_attr "prefix" "evex")
9965   (set_attr "mode" "TI")])
9966
9967(define_insn "*avx512vl_<code>v2div2si2"
9968  [(set (match_operand:V4SI 0 "register_operand" "=v")
9969    (vec_concat:V4SI
9970      (any_truncate:V2SI
9971	      (match_operand:V2DI 1 "register_operand" "v"))
9972      (match_operand:V2SI 2 "const0_operand")))]
9973  "TARGET_AVX512VL"
9974  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9975  [(set_attr "type" "ssemov")
9976   (set_attr "prefix" "evex")
9977   (set_attr "mode" "TI")])
9978
9979(define_insn "*avx512vl_<code>v2div2si2_store"
9980  [(set (match_operand:V4SI 0 "memory_operand" "=m")
9981    (vec_concat:V4SI
9982      (any_truncate:V2SI
9983	      (match_operand:V2DI 1 "register_operand" "v"))
9984      (vec_select:V2SI
9985        (match_dup 0)
9986        (parallel [(const_int 2) (const_int 3)]))))]
9987  "TARGET_AVX512VL"
9988  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9989  [(set_attr "type" "ssemov")
9990   (set_attr "memory" "store")
9991   (set_attr "prefix" "evex")
9992   (set_attr "mode" "TI")])
9993
9994(define_insn "avx512vl_<code>v2div2si2_mask"
9995  [(set (match_operand:V4SI 0 "register_operand" "=v")
9996    (vec_concat:V4SI
9997      (vec_merge:V2SI
9998        (any_truncate:V2SI
9999          (match_operand:V2DI 1 "register_operand" "v"))
10000        (vec_select:V2SI
10001          (match_operand:V4SI 2 "vector_move_operand" "0C")
10002          (parallel [(const_int 0) (const_int 1)]))
10003        (match_operand:QI 3 "register_operand" "Yk"))
10004      (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10005  "TARGET_AVX512VL"
10006  "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10007  [(set_attr "type" "ssemov")
10008   (set_attr "prefix" "evex")
10009   (set_attr "mode" "TI")])
10010
10011(define_insn "*avx512vl_<code>v2div2si2_mask_1"
10012  [(set (match_operand:V4SI 0 "register_operand" "=v")
10013    (vec_concat:V4SI
10014      (vec_merge:V2SI
10015	(any_truncate:V2SI
10016	  (match_operand:V2DI 1 "register_operand" "v"))
10017	(const_vector:V2SI [(const_int 0) (const_int 0)])
10018	(match_operand:QI 2 "register_operand" "Yk"))
10019      (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10020  "TARGET_AVX512VL"
10021  "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10022  [(set_attr "type" "ssemov")
10023   (set_attr "prefix" "evex")
10024   (set_attr "mode" "TI")])
10025
10026(define_insn "avx512vl_<code>v2div2si2_mask_store"
10027  [(set (match_operand:V4SI 0 "memory_operand" "=m")
10028    (vec_concat:V4SI
10029      (vec_merge:V2SI
10030        (any_truncate:V2SI
10031          (match_operand:V2DI 1 "register_operand" "v"))
10032        (vec_select:V2SI
10033          (match_dup 0)
10034          (parallel [(const_int 0) (const_int 1)]))
10035        (match_operand:QI 2 "register_operand" "Yk"))
10036      (vec_select:V2SI
10037        (match_dup 0)
10038        (parallel [(const_int 2) (const_int 3)]))))]
10039  "TARGET_AVX512VL"
10040  "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
10041  [(set_attr "type" "ssemov")
10042   (set_attr "memory" "store")
10043   (set_attr "prefix" "evex")
10044   (set_attr "mode" "TI")])
10045
10046(define_insn "*avx512f_<code>v8div16qi2"
10047  [(set (match_operand:V16QI 0 "register_operand" "=v")
10048	(vec_concat:V16QI
10049	  (any_truncate:V8QI
10050	    (match_operand:V8DI 1 "register_operand" "v"))
10051	  (const_vector:V8QI [(const_int 0) (const_int 0)
10052			      (const_int 0) (const_int 0)
10053			      (const_int 0) (const_int 0)
10054			      (const_int 0) (const_int 0)])))]
10055  "TARGET_AVX512F"
10056  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10057  [(set_attr "type" "ssemov")
10058   (set_attr "prefix" "evex")
10059   (set_attr "mode" "TI")])
10060
10061(define_insn "*avx512f_<code>v8div16qi2_store"
10062  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10063	(vec_concat:V16QI
10064	  (any_truncate:V8QI
10065	    (match_operand:V8DI 1 "register_operand" "v"))
10066	  (vec_select:V8QI
10067	    (match_dup 0)
10068	    (parallel [(const_int 8) (const_int 9)
10069		       (const_int 10) (const_int 11)
10070		       (const_int 12) (const_int 13)
10071		       (const_int 14) (const_int 15)]))))]
10072  "TARGET_AVX512F"
10073  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10074  [(set_attr "type" "ssemov")
10075   (set_attr "memory" "store")
10076   (set_attr "prefix" "evex")
10077   (set_attr "mode" "TI")])
10078
10079(define_insn "avx512f_<code>v8div16qi2_mask"
10080  [(set (match_operand:V16QI 0 "register_operand" "=v")
10081    (vec_concat:V16QI
10082      (vec_merge:V8QI
10083        (any_truncate:V8QI
10084          (match_operand:V8DI 1 "register_operand" "v"))
10085        (vec_select:V8QI
10086          (match_operand:V16QI 2 "vector_move_operand" "0C")
10087          (parallel [(const_int 0) (const_int 1)
10088                     (const_int 2) (const_int 3)
10089                     (const_int 4) (const_int 5)
10090                     (const_int 6) (const_int 7)]))
10091        (match_operand:QI 3 "register_operand" "Yk"))
10092      (const_vector:V8QI [(const_int 0) (const_int 0)
10093                          (const_int 0) (const_int 0)
10094                          (const_int 0) (const_int 0)
10095                          (const_int 0) (const_int 0)])))]
10096  "TARGET_AVX512F"
10097  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10098  [(set_attr "type" "ssemov")
10099   (set_attr "prefix" "evex")
10100   (set_attr "mode" "TI")])
10101
10102(define_insn "*avx512f_<code>v8div16qi2_mask_1"
10103  [(set (match_operand:V16QI 0 "register_operand" "=v")
10104    (vec_concat:V16QI
10105      (vec_merge:V8QI
10106	(any_truncate:V8QI
10107	  (match_operand:V8DI 1 "register_operand" "v"))
10108	(const_vector:V8QI [(const_int 0) (const_int 0)
10109			    (const_int 0) (const_int 0)
10110			    (const_int 0) (const_int 0)
10111			    (const_int 0) (const_int 0)])
10112	(match_operand:QI 2 "register_operand" "Yk"))
10113      (const_vector:V8QI [(const_int 0) (const_int 0)
10114			  (const_int 0) (const_int 0)
10115			  (const_int 0) (const_int 0)
10116			  (const_int 0) (const_int 0)])))]
10117  "TARGET_AVX512F"
10118  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10119  [(set_attr "type" "ssemov")
10120   (set_attr "prefix" "evex")
10121   (set_attr "mode" "TI")])
10122
10123(define_insn "avx512f_<code>v8div16qi2_mask_store"
10124  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10125    (vec_concat:V16QI
10126      (vec_merge:V8QI
10127        (any_truncate:V8QI
10128          (match_operand:V8DI 1 "register_operand" "v"))
10129        (vec_select:V8QI
10130          (match_dup 0)
10131          (parallel [(const_int 0) (const_int 1)
10132                     (const_int 2) (const_int 3)
10133                     (const_int 4) (const_int 5)
10134                     (const_int 6) (const_int 7)]))
10135        (match_operand:QI 2 "register_operand" "Yk"))
10136      (vec_select:V8QI
10137        (match_dup 0)
10138        (parallel [(const_int 8) (const_int 9)
10139                   (const_int 10) (const_int 11)
10140                   (const_int 12) (const_int 13)
10141                   (const_int 14) (const_int 15)]))))]
10142  "TARGET_AVX512F"
10143  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10144  [(set_attr "type" "ssemov")
10145   (set_attr "memory" "store")
10146   (set_attr "prefix" "evex")
10147   (set_attr "mode" "TI")])
10148
10149;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10150;;
10151;; Parallel integral arithmetic
10152;;
10153;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10154
10155(define_expand "neg<mode>2"
10156  [(set (match_operand:VI_AVX2 0 "register_operand")
10157	(minus:VI_AVX2
10158	  (match_dup 2)
10159	  (match_operand:VI_AVX2 1 "vector_operand")))]
10160  "TARGET_SSE2"
10161  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
10162
10163(define_expand "<plusminus_insn><mode>3"
10164  [(set (match_operand:VI_AVX2 0 "register_operand")
10165	(plusminus:VI_AVX2
10166	  (match_operand:VI_AVX2 1 "vector_operand")
10167	  (match_operand:VI_AVX2 2 "vector_operand")))]
10168  "TARGET_SSE2"
10169  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10170
10171(define_expand "<plusminus_insn><mode>3_mask"
10172  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10173	(vec_merge:VI48_AVX512VL
10174	  (plusminus:VI48_AVX512VL
10175	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10176	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10177	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10178	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10179  "TARGET_AVX512F"
10180  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10181
10182(define_expand "<plusminus_insn><mode>3_mask"
10183  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10184	(vec_merge:VI12_AVX512VL
10185	  (plusminus:VI12_AVX512VL
10186	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10187	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10188	  (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10189	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10190  "TARGET_AVX512BW"
10191  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10192
10193(define_insn "*<plusminus_insn><mode>3"
10194  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10195	(plusminus:VI_AVX2
10196	  (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10197	  (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10198  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10199  "@
10200   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10201   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10202  [(set_attr "isa" "noavx,avx")
10203   (set_attr "type" "sseiadd")
10204   (set_attr "prefix_data16" "1,*")
10205   (set_attr "prefix" "orig,vex")
10206   (set_attr "mode" "<sseinsnmode>")])
10207
10208(define_insn "*<plusminus_insn><mode>3_mask"
10209  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10210	(vec_merge:VI48_AVX512VL
10211	  (plusminus:VI48_AVX512VL
10212	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10213	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10214	  (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10215	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10216  "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10217  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10218  [(set_attr "type" "sseiadd")
10219   (set_attr "prefix" "evex")
10220   (set_attr "mode" "<sseinsnmode>")])
10221
10222(define_insn "*<plusminus_insn><mode>3_mask"
10223  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10224	(vec_merge:VI12_AVX512VL
10225	  (plusminus:VI12_AVX512VL
10226	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10227	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10228	  (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10229	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10230  "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10231  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10232  [(set_attr "type" "sseiadd")
10233   (set_attr "prefix" "evex")
10234   (set_attr "mode" "<sseinsnmode>")])
10235
10236(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10237  [(set (match_operand:VI12_AVX2 0 "register_operand")
10238	(sat_plusminus:VI12_AVX2
10239	  (match_operand:VI12_AVX2 1 "vector_operand")
10240	  (match_operand:VI12_AVX2 2 "vector_operand")))]
10241  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10242  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10243
10244(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10245  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10246	(sat_plusminus:VI12_AVX2
10247	  (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10248	  (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10249  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10250   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10251  "@
10252   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10253   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10254  [(set_attr "isa" "noavx,avx")
10255   (set_attr "type" "sseiadd")
10256   (set_attr "prefix_data16" "1,*")
10257   (set_attr "prefix" "orig,maybe_evex")
10258   (set_attr "mode" "TI")])
10259
10260(define_expand "mul<mode>3<mask_name>"
10261  [(set (match_operand:VI1_AVX512 0 "register_operand")
10262	(mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10263		       (match_operand:VI1_AVX512 2 "register_operand")))]
10264  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10265{
10266  ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10267  DONE;
10268})
10269
10270(define_expand "mul<mode>3<mask_name>"
10271  [(set (match_operand:VI2_AVX2 0 "register_operand")
10272	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10273		       (match_operand:VI2_AVX2 2 "vector_operand")))]
10274  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10275  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10276
10277(define_insn "*mul<mode>3<mask_name>"
10278  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10279	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
10280		       (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
10281  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10282   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10283  "@
10284   pmullw\t{%2, %0|%0, %2}
10285   vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10286  [(set_attr "isa" "noavx,avx")
10287   (set_attr "type" "sseimul")
10288   (set_attr "prefix_data16" "1,*")
10289   (set_attr "prefix" "orig,vex")
10290   (set_attr "mode" "<sseinsnmode>")])
10291
10292(define_expand "<s>mul<mode>3_highpart<mask_name>"
10293  [(set (match_operand:VI2_AVX2 0 "register_operand")
10294	(truncate:VI2_AVX2
10295	  (lshiftrt:<ssedoublemode>
10296	    (mult:<ssedoublemode>
10297	      (any_extend:<ssedoublemode>
10298		(match_operand:VI2_AVX2 1 "vector_operand"))
10299	      (any_extend:<ssedoublemode>
10300		(match_operand:VI2_AVX2 2 "vector_operand")))
10301	    (const_int 16))))]
10302  "TARGET_SSE2
10303   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10304  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10305
10306(define_insn "*<s>mul<mode>3_highpart<mask_name>"
10307  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10308	(truncate:VI2_AVX2
10309	  (lshiftrt:<ssedoublemode>
10310	    (mult:<ssedoublemode>
10311	      (any_extend:<ssedoublemode>
10312		(match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
10313	      (any_extend:<ssedoublemode>
10314		(match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
10315	    (const_int 16))))]
10316  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10317   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10318  "@
10319   pmulh<u>w\t{%2, %0|%0, %2}
10320   vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10321  [(set_attr "isa" "noavx,avx")
10322   (set_attr "type" "sseimul")
10323   (set_attr "prefix_data16" "1,*")
10324   (set_attr "prefix" "orig,vex")
10325   (set_attr "mode" "<sseinsnmode>")])
10326
10327(define_expand "vec_widen_umult_even_v16si<mask_name>"
10328  [(set (match_operand:V8DI 0 "register_operand")
10329        (mult:V8DI
10330          (zero_extend:V8DI
10331            (vec_select:V8SI
10332              (match_operand:V16SI 1 "nonimmediate_operand")
10333              (parallel [(const_int 0) (const_int 2)
10334                         (const_int 4) (const_int 6)
10335                         (const_int 8) (const_int 10)
10336                         (const_int 12) (const_int 14)])))
10337          (zero_extend:V8DI
10338            (vec_select:V8SI
10339              (match_operand:V16SI 2 "nonimmediate_operand")
10340              (parallel [(const_int 0) (const_int 2)
10341                         (const_int 4) (const_int 6)
10342                         (const_int 8) (const_int 10)
10343                         (const_int 12) (const_int 14)])))))]
10344  "TARGET_AVX512F"
10345  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10346
10347(define_insn "*vec_widen_umult_even_v16si<mask_name>"
10348  [(set (match_operand:V8DI 0 "register_operand" "=v")
10349        (mult:V8DI
10350          (zero_extend:V8DI
10351            (vec_select:V8SI
10352              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10353              (parallel [(const_int 0) (const_int 2)
10354                         (const_int 4) (const_int 6)
10355                         (const_int 8) (const_int 10)
10356                         (const_int 12) (const_int 14)])))
10357          (zero_extend:V8DI
10358            (vec_select:V8SI
10359              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10360              (parallel [(const_int 0) (const_int 2)
10361                         (const_int 4) (const_int 6)
10362                         (const_int 8) (const_int 10)
10363                         (const_int 12) (const_int 14)])))))]
10364  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10365  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10366  [(set_attr "type" "sseimul")
10367   (set_attr "prefix_extra" "1")
10368   (set_attr "prefix" "evex")
10369   (set_attr "mode" "XI")])
10370
10371(define_expand "vec_widen_umult_even_v8si<mask_name>"
10372  [(set (match_operand:V4DI 0 "register_operand")
10373	(mult:V4DI
10374	  (zero_extend:V4DI
10375	    (vec_select:V4SI
10376	      (match_operand:V8SI 1 "nonimmediate_operand")
10377	      (parallel [(const_int 0) (const_int 2)
10378			 (const_int 4) (const_int 6)])))
10379	  (zero_extend:V4DI
10380	    (vec_select:V4SI
10381	      (match_operand:V8SI 2 "nonimmediate_operand")
10382	      (parallel [(const_int 0) (const_int 2)
10383			 (const_int 4) (const_int 6)])))))]
10384  "TARGET_AVX2 && <mask_avx512vl_condition>"
10385  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10386
10387(define_insn "*vec_widen_umult_even_v8si<mask_name>"
10388  [(set (match_operand:V4DI 0 "register_operand" "=v")
10389	(mult:V4DI
10390	  (zero_extend:V4DI
10391	    (vec_select:V4SI
10392	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10393	      (parallel [(const_int 0) (const_int 2)
10394			 (const_int 4) (const_int 6)])))
10395	  (zero_extend:V4DI
10396	    (vec_select:V4SI
10397	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10398	      (parallel [(const_int 0) (const_int 2)
10399			 (const_int 4) (const_int 6)])))))]
10400  "TARGET_AVX2 && <mask_avx512vl_condition>
10401   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10402  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10403  [(set_attr "type" "sseimul")
10404   (set_attr "prefix" "maybe_evex")
10405   (set_attr "mode" "OI")])
10406
10407(define_expand "vec_widen_umult_even_v4si<mask_name>"
10408  [(set (match_operand:V2DI 0 "register_operand")
10409	(mult:V2DI
10410	  (zero_extend:V2DI
10411	    (vec_select:V2SI
10412	      (match_operand:V4SI 1 "vector_operand")
10413	      (parallel [(const_int 0) (const_int 2)])))
10414	  (zero_extend:V2DI
10415	    (vec_select:V2SI
10416	      (match_operand:V4SI 2 "vector_operand")
10417	      (parallel [(const_int 0) (const_int 2)])))))]
10418  "TARGET_SSE2 && <mask_avx512vl_condition>"
10419  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10420
10421(define_insn "*vec_widen_umult_even_v4si<mask_name>"
10422  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10423	(mult:V2DI
10424	  (zero_extend:V2DI
10425	    (vec_select:V2SI
10426	      (match_operand:V4SI 1 "vector_operand" "%0,v")
10427	      (parallel [(const_int 0) (const_int 2)])))
10428	  (zero_extend:V2DI
10429	    (vec_select:V2SI
10430	      (match_operand:V4SI 2 "vector_operand" "xBm,vm")
10431	      (parallel [(const_int 0) (const_int 2)])))))]
10432  "TARGET_SSE2 && <mask_avx512vl_condition>
10433   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10434  "@
10435   pmuludq\t{%2, %0|%0, %2}
10436   vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10437  [(set_attr "isa" "noavx,avx")
10438   (set_attr "type" "sseimul")
10439   (set_attr "prefix_data16" "1,*")
10440   (set_attr "prefix" "orig,maybe_evex")
10441   (set_attr "mode" "TI")])
10442
10443(define_expand "vec_widen_smult_even_v16si<mask_name>"
10444  [(set (match_operand:V8DI 0 "register_operand")
10445        (mult:V8DI
10446          (sign_extend:V8DI
10447            (vec_select:V8SI
10448              (match_operand:V16SI 1 "nonimmediate_operand")
10449              (parallel [(const_int 0) (const_int 2)
10450                         (const_int 4) (const_int 6)
10451                         (const_int 8) (const_int 10)
10452                         (const_int 12) (const_int 14)])))
10453          (sign_extend:V8DI
10454            (vec_select:V8SI
10455              (match_operand:V16SI 2 "nonimmediate_operand")
10456              (parallel [(const_int 0) (const_int 2)
10457                         (const_int 4) (const_int 6)
10458                         (const_int 8) (const_int 10)
10459                         (const_int 12) (const_int 14)])))))]
10460  "TARGET_AVX512F"
10461  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10462
10463(define_insn "*vec_widen_smult_even_v16si<mask_name>"
10464  [(set (match_operand:V8DI 0 "register_operand" "=v")
10465        (mult:V8DI
10466          (sign_extend:V8DI
10467            (vec_select:V8SI
10468              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10469              (parallel [(const_int 0) (const_int 2)
10470                         (const_int 4) (const_int 6)
10471                         (const_int 8) (const_int 10)
10472                         (const_int 12) (const_int 14)])))
10473          (sign_extend:V8DI
10474            (vec_select:V8SI
10475              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10476              (parallel [(const_int 0) (const_int 2)
10477                         (const_int 4) (const_int 6)
10478                         (const_int 8) (const_int 10)
10479                         (const_int 12) (const_int 14)])))))]
10480  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10481  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10482  [(set_attr "type" "sseimul")
10483   (set_attr "prefix_extra" "1")
10484   (set_attr "prefix" "evex")
10485   (set_attr "mode" "XI")])
10486
10487(define_expand "vec_widen_smult_even_v8si<mask_name>"
10488  [(set (match_operand:V4DI 0 "register_operand")
10489	(mult:V4DI
10490	  (sign_extend:V4DI
10491	    (vec_select:V4SI
10492	      (match_operand:V8SI 1 "nonimmediate_operand")
10493	      (parallel [(const_int 0) (const_int 2)
10494			 (const_int 4) (const_int 6)])))
10495	  (sign_extend:V4DI
10496	    (vec_select:V4SI
10497	      (match_operand:V8SI 2 "nonimmediate_operand")
10498	      (parallel [(const_int 0) (const_int 2)
10499			 (const_int 4) (const_int 6)])))))]
10500  "TARGET_AVX2 && <mask_avx512vl_condition>"
10501  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10502
10503(define_insn "*vec_widen_smult_even_v8si<mask_name>"
10504  [(set (match_operand:V4DI 0 "register_operand" "=v")
10505	(mult:V4DI
10506	  (sign_extend:V4DI
10507	    (vec_select:V4SI
10508	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10509	      (parallel [(const_int 0) (const_int 2)
10510			 (const_int 4) (const_int 6)])))
10511	  (sign_extend:V4DI
10512	    (vec_select:V4SI
10513	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10514	      (parallel [(const_int 0) (const_int 2)
10515			 (const_int 4) (const_int 6)])))))]
10516  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10517  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10518  [(set_attr "type" "sseimul")
10519   (set_attr "prefix_extra" "1")
10520   (set_attr "prefix" "vex")
10521   (set_attr "mode" "OI")])
10522
10523(define_expand "sse4_1_mulv2siv2di3<mask_name>"
10524  [(set (match_operand:V2DI 0 "register_operand")
10525	(mult:V2DI
10526	  (sign_extend:V2DI
10527	    (vec_select:V2SI
10528	      (match_operand:V4SI 1 "vector_operand")
10529	      (parallel [(const_int 0) (const_int 2)])))
10530	  (sign_extend:V2DI
10531	    (vec_select:V2SI
10532	      (match_operand:V4SI 2 "vector_operand")
10533	      (parallel [(const_int 0) (const_int 2)])))))]
10534  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10535  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10536
10537(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10538  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10539	(mult:V2DI
10540	  (sign_extend:V2DI
10541	    (vec_select:V2SI
10542	      (match_operand:V4SI 1 "vector_operand" "%0,0,v")
10543	      (parallel [(const_int 0) (const_int 2)])))
10544	  (sign_extend:V2DI
10545	    (vec_select:V2SI
10546	      (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
10547	      (parallel [(const_int 0) (const_int 2)])))))]
10548  "TARGET_SSE4_1 && <mask_avx512vl_condition>
10549   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10550  "@
10551   pmuldq\t{%2, %0|%0, %2}
10552   pmuldq\t{%2, %0|%0, %2}
10553   vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10554  [(set_attr "isa" "noavx,noavx,avx")
10555   (set_attr "type" "sseimul")
10556   (set_attr "prefix_data16" "1,1,*")
10557   (set_attr "prefix_extra" "1")
10558   (set_attr "prefix" "orig,orig,vex")
10559   (set_attr "mode" "TI")])
10560
10561(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10562  [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10563          (unspec:<sseunpackmode>
10564            [(match_operand:VI2_AVX2 1 "register_operand" "v")
10565             (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10566             UNSPEC_PMADDWD512))]
10567   "TARGET_AVX512BW && <mask_mode512bit_condition>"
10568   "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10569  [(set_attr "type" "sseiadd")
10570   (set_attr "prefix" "evex")
10571   (set_attr "mode" "XI")])
10572
10573(define_expand "avx2_pmaddwd"
10574  [(set (match_operand:V8SI 0 "register_operand")
10575	(plus:V8SI
10576	  (mult:V8SI
10577	    (sign_extend:V8SI
10578	      (vec_select:V8HI
10579		(match_operand:V16HI 1 "nonimmediate_operand")
10580		(parallel [(const_int 0) (const_int 2)
10581			   (const_int 4) (const_int 6)
10582			   (const_int 8) (const_int 10)
10583			   (const_int 12) (const_int 14)])))
10584	    (sign_extend:V8SI
10585	      (vec_select:V8HI
10586		(match_operand:V16HI 2 "nonimmediate_operand")
10587		(parallel [(const_int 0) (const_int 2)
10588			   (const_int 4) (const_int 6)
10589			   (const_int 8) (const_int 10)
10590			   (const_int 12) (const_int 14)]))))
10591	  (mult:V8SI
10592	    (sign_extend:V8SI
10593	      (vec_select:V8HI (match_dup 1)
10594		(parallel [(const_int 1) (const_int 3)
10595			   (const_int 5) (const_int 7)
10596			   (const_int 9) (const_int 11)
10597			   (const_int 13) (const_int 15)])))
10598	    (sign_extend:V8SI
10599	      (vec_select:V8HI (match_dup 2)
10600		(parallel [(const_int 1) (const_int 3)
10601			   (const_int 5) (const_int 7)
10602			   (const_int 9) (const_int 11)
10603			   (const_int 13) (const_int 15)]))))))]
10604  "TARGET_AVX2"
10605  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10606
10607(define_insn "*avx2_pmaddwd"
10608  [(set (match_operand:V8SI 0 "register_operand" "=x,v")
10609	(plus:V8SI
10610	  (mult:V8SI
10611	    (sign_extend:V8SI
10612	      (vec_select:V8HI
10613		(match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
10614		(parallel [(const_int 0) (const_int 2)
10615			   (const_int 4) (const_int 6)
10616			   (const_int 8) (const_int 10)
10617			   (const_int 12) (const_int 14)])))
10618	    (sign_extend:V8SI
10619	      (vec_select:V8HI
10620		(match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
10621		(parallel [(const_int 0) (const_int 2)
10622			   (const_int 4) (const_int 6)
10623			   (const_int 8) (const_int 10)
10624			   (const_int 12) (const_int 14)]))))
10625	  (mult:V8SI
10626	    (sign_extend:V8SI
10627	      (vec_select:V8HI (match_dup 1)
10628		(parallel [(const_int 1) (const_int 3)
10629			   (const_int 5) (const_int 7)
10630			   (const_int 9) (const_int 11)
10631			   (const_int 13) (const_int 15)])))
10632	    (sign_extend:V8SI
10633	      (vec_select:V8HI (match_dup 2)
10634		(parallel [(const_int 1) (const_int 3)
10635			   (const_int 5) (const_int 7)
10636			   (const_int 9) (const_int 11)
10637			   (const_int 13) (const_int 15)]))))))]
10638  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10639  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10640  [(set_attr "type" "sseiadd")
10641   (set_attr "isa" "*,avx512bw")
10642   (set_attr "prefix" "vex,evex")
10643   (set_attr "mode" "OI")])
10644
10645(define_expand "sse2_pmaddwd"
10646  [(set (match_operand:V4SI 0 "register_operand")
10647	(plus:V4SI
10648	  (mult:V4SI
10649	    (sign_extend:V4SI
10650	      (vec_select:V4HI
10651		(match_operand:V8HI 1 "vector_operand")
10652		(parallel [(const_int 0) (const_int 2)
10653			   (const_int 4) (const_int 6)])))
10654	    (sign_extend:V4SI
10655	      (vec_select:V4HI
10656		(match_operand:V8HI 2 "vector_operand")
10657		(parallel [(const_int 0) (const_int 2)
10658			   (const_int 4) (const_int 6)]))))
10659	  (mult:V4SI
10660	    (sign_extend:V4SI
10661	      (vec_select:V4HI (match_dup 1)
10662		(parallel [(const_int 1) (const_int 3)
10663			   (const_int 5) (const_int 7)])))
10664	    (sign_extend:V4SI
10665	      (vec_select:V4HI (match_dup 2)
10666		(parallel [(const_int 1) (const_int 3)
10667			   (const_int 5) (const_int 7)]))))))]
10668  "TARGET_SSE2"
10669  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10670
10671(define_insn "*sse2_pmaddwd"
10672  [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
10673	(plus:V4SI
10674	  (mult:V4SI
10675	    (sign_extend:V4SI
10676	      (vec_select:V4HI
10677		(match_operand:V8HI 1 "vector_operand" "%0,x,v")
10678		(parallel [(const_int 0) (const_int 2)
10679			   (const_int 4) (const_int 6)])))
10680	    (sign_extend:V4SI
10681	      (vec_select:V4HI
10682		(match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
10683		(parallel [(const_int 0) (const_int 2)
10684			   (const_int 4) (const_int 6)]))))
10685	  (mult:V4SI
10686	    (sign_extend:V4SI
10687	      (vec_select:V4HI (match_dup 1)
10688		(parallel [(const_int 1) (const_int 3)
10689			   (const_int 5) (const_int 7)])))
10690	    (sign_extend:V4SI
10691	      (vec_select:V4HI (match_dup 2)
10692		(parallel [(const_int 1) (const_int 3)
10693			   (const_int 5) (const_int 7)]))))))]
10694  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10695  "@
10696   pmaddwd\t{%2, %0|%0, %2}
10697   vpmaddwd\t{%2, %1, %0|%0, %1, %2}
10698   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10699  [(set_attr "isa" "noavx,avx,avx512bw")
10700   (set_attr "type" "sseiadd")
10701   (set_attr "atom_unit" "simul")
10702   (set_attr "prefix_data16" "1,*,*")
10703   (set_attr "prefix" "orig,vex,evex")
10704   (set_attr "mode" "TI")])
10705
10706(define_insn "avx512dq_mul<mode>3<mask_name>"
10707  [(set (match_operand:VI8 0 "register_operand" "=v")
10708	(mult:VI8
10709	  (match_operand:VI8 1 "register_operand" "v")
10710	  (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10711  "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10712  "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10713  [(set_attr "type" "sseimul")
10714   (set_attr "prefix" "evex")
10715   (set_attr "mode" "<sseinsnmode>")])
10716
10717(define_expand "mul<mode>3<mask_name>"
10718  [(set (match_operand:VI4_AVX512F 0 "register_operand")
10719	(mult:VI4_AVX512F
10720	  (match_operand:VI4_AVX512F 1 "general_vector_operand")
10721	  (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10722  "TARGET_SSE2 && <mask_mode512bit_condition>"
10723{
10724  if (TARGET_SSE4_1)
10725    {
10726      if (!vector_operand (operands[1], <MODE>mode))
10727	operands[1] = force_reg (<MODE>mode, operands[1]);
10728      if (!vector_operand (operands[2], <MODE>mode))
10729	operands[2] = force_reg (<MODE>mode, operands[2]);
10730      ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10731    }
10732  else
10733    {
10734      ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10735      DONE;
10736    }
10737})
10738
10739(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10740  [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10741	(mult:VI4_AVX512F
10742	  (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
10743	  (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
10744  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10745   && <mask_mode512bit_condition>"
10746  "@
10747   pmulld\t{%2, %0|%0, %2}
10748   pmulld\t{%2, %0|%0, %2}
10749   vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10750  [(set_attr "isa" "noavx,noavx,avx")
10751   (set_attr "type" "sseimul")
10752   (set_attr "prefix_extra" "1")
10753   (set_attr "prefix" "<mask_prefix4>")
10754   (set_attr "btver2_decode" "vector,vector,vector")
10755   (set_attr "mode" "<sseinsnmode>")])
10756
10757(define_expand "mul<mode>3"
10758  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10759	(mult:VI8_AVX2_AVX512F
10760	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10761	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10762  "TARGET_SSE2"
10763{
10764  ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
10765  DONE;
10766})
10767
10768(define_expand "vec_widen_<s>mult_hi_<mode>"
10769  [(match_operand:<sseunpackmode> 0 "register_operand")
10770   (any_extend:<sseunpackmode>
10771     (match_operand:VI124_AVX2 1 "register_operand"))
10772   (match_operand:VI124_AVX2 2 "register_operand")]
10773  "TARGET_SSE2"
10774{
10775  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10776			      <u_bool>, true);
10777  DONE;
10778})
10779
10780(define_expand "vec_widen_<s>mult_lo_<mode>"
10781  [(match_operand:<sseunpackmode> 0 "register_operand")
10782   (any_extend:<sseunpackmode>
10783     (match_operand:VI124_AVX2 1 "register_operand"))
10784   (match_operand:VI124_AVX2 2 "register_operand")]
10785  "TARGET_SSE2"
10786{
10787  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10788			      <u_bool>, false);
10789  DONE;
10790})
10791
10792;; Most widen_<s>mult_even_<mode> can be handled directly from other
10793;; named patterns, but signed V4SI needs special help for plain SSE2.
10794(define_expand "vec_widen_smult_even_v4si"
10795  [(match_operand:V2DI 0 "register_operand")
10796   (match_operand:V4SI 1 "vector_operand")
10797   (match_operand:V4SI 2 "vector_operand")]
10798  "TARGET_SSE2"
10799{
10800  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10801				 false, false);
10802  DONE;
10803})
10804
10805(define_expand "vec_widen_<s>mult_odd_<mode>"
10806  [(match_operand:<sseunpackmode> 0 "register_operand")
10807   (any_extend:<sseunpackmode>
10808     (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10809   (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10810  "TARGET_SSE2"
10811{
10812  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10813				 <u_bool>, true);
10814  DONE;
10815})
10816
10817(define_mode_attr SDOT_PMADD_SUF
10818  [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10819
10820(define_expand "sdot_prod<mode>"
10821  [(match_operand:<sseunpackmode> 0 "register_operand")
10822   (match_operand:VI2_AVX2 1 "register_operand")
10823   (match_operand:VI2_AVX2 2 "register_operand")
10824   (match_operand:<sseunpackmode> 3 "register_operand")]
10825  "TARGET_SSE2"
10826{
10827  rtx t = gen_reg_rtx (<sseunpackmode>mode);
10828  emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10829  emit_insn (gen_rtx_SET (operands[0],
10830			  gen_rtx_PLUS (<sseunpackmode>mode,
10831					operands[3], t)));
10832  DONE;
10833})
10834
10835;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10836;; back together when madd is available.
10837(define_expand "sdot_prodv4si"
10838  [(match_operand:V2DI 0 "register_operand")
10839   (match_operand:V4SI 1 "register_operand")
10840   (match_operand:V4SI 2 "register_operand")
10841   (match_operand:V2DI 3 "register_operand")]
10842  "TARGET_XOP"
10843{
10844  rtx t = gen_reg_rtx (V2DImode);
10845  emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10846  emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10847  DONE;
10848})
10849
10850(define_expand "usadv16qi"
10851  [(match_operand:V4SI 0 "register_operand")
10852   (match_operand:V16QI 1 "register_operand")
10853   (match_operand:V16QI 2 "vector_operand")
10854   (match_operand:V4SI 3 "vector_operand")]
10855  "TARGET_SSE2"
10856{
10857  rtx t1 = gen_reg_rtx (V2DImode);
10858  rtx t2 = gen_reg_rtx (V4SImode);
10859  emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10860  convert_move (t2, t1, 0);
10861  emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10862  DONE;
10863})
10864
10865(define_expand "usadv32qi"
10866  [(match_operand:V8SI 0 "register_operand")
10867   (match_operand:V32QI 1 "register_operand")
10868   (match_operand:V32QI 2 "nonimmediate_operand")
10869   (match_operand:V8SI 3 "nonimmediate_operand")]
10870  "TARGET_AVX2"
10871{
10872  rtx t1 = gen_reg_rtx (V4DImode);
10873  rtx t2 = gen_reg_rtx (V8SImode);
10874  emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10875  convert_move (t2, t1, 0);
10876  emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10877  DONE;
10878})
10879
10880(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10881  [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
10882	(ashiftrt:VI248_AVX512BW_1
10883	  (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10884	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10885  "TARGET_AVX512VL"
10886  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10887  [(set_attr "type" "sseishft")
10888   (set (attr "length_immediate")
10889     (if_then_else (match_operand 2 "const_int_operand")
10890       (const_string "1")
10891       (const_string "0")))
10892   (set_attr "mode" "<sseinsnmode>")])
10893
10894(define_insn "ashr<mode>3"
10895  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10896	(ashiftrt:VI24_AVX2
10897	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10898	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10899  "TARGET_SSE2"
10900  "@
10901   psra<ssemodesuffix>\t{%2, %0|%0, %2}
10902   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10903  [(set_attr "isa" "noavx,avx")
10904   (set_attr "type" "sseishft")
10905   (set (attr "length_immediate")
10906     (if_then_else (match_operand 2 "const_int_operand")
10907       (const_string "1")
10908       (const_string "0")))
10909   (set_attr "prefix_data16" "1,*")
10910   (set_attr "prefix" "orig,vex")
10911   (set_attr "mode" "<sseinsnmode>")])
10912
10913(define_insn "ashr<mode>3<mask_name>"
10914  [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10915	(ashiftrt:VI248_AVX512BW_AVX512VL
10916	  (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10917	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10918  "TARGET_AVX512F"
10919  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10920  [(set_attr "type" "sseishft")
10921   (set (attr "length_immediate")
10922     (if_then_else (match_operand 2 "const_int_operand")
10923       (const_string "1")
10924       (const_string "0")))
10925   (set_attr "mode" "<sseinsnmode>")])
10926
10927(define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
10928  [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
10929	(any_lshift:VI248_AVX512BW_2
10930	  (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
10931	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10932  "TARGET_AVX512VL"
10933  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10934  [(set_attr "type" "sseishft")
10935   (set (attr "length_immediate")
10936     (if_then_else (match_operand 2 "const_int_operand")
10937       (const_string "1")
10938       (const_string "0")))
10939   (set_attr "mode" "<sseinsnmode>")])
10940
10941(define_insn "<shift_insn><mode>3"
10942  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
10943	(any_lshift:VI248_AVX2
10944	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
10945	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10946  "TARGET_SSE2"
10947  "@
10948   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10949   vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10950  [(set_attr "isa" "noavx,avx")
10951   (set_attr "type" "sseishft")
10952   (set (attr "length_immediate")
10953     (if_then_else (match_operand 2 "const_int_operand")
10954       (const_string "1")
10955       (const_string "0")))
10956   (set_attr "prefix_data16" "1,*")
10957   (set_attr "prefix" "orig,vex")
10958   (set_attr "mode" "<sseinsnmode>")])
10959
10960(define_insn "<shift_insn><mode>3<mask_name>"
10961  [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
10962	(any_lshift:VI248_AVX512BW
10963	  (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
10964	  (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
10965  "TARGET_AVX512F"
10966  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10967  [(set_attr "type" "sseishft")
10968   (set (attr "length_immediate")
10969     (if_then_else (match_operand 2 "const_int_operand")
10970       (const_string "1")
10971       (const_string "0")))
10972   (set_attr "mode" "<sseinsnmode>")])
10973
10974
10975(define_expand "vec_shr_<mode>"
10976  [(set (match_dup 3)
10977	(lshiftrt:V1TI
10978	 (match_operand:VI_128 1 "register_operand")
10979	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10980   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10981  "TARGET_SSE2"
10982{
10983  operands[1] = gen_lowpart (V1TImode, operands[1]);
10984  operands[3] = gen_reg_rtx (V1TImode);
10985  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10986})
10987
10988(define_insn "avx512bw_<shift_insn><mode>3"
10989  [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
10990	(any_lshift:VIMAX_AVX512VL
10991	 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
10992	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
10993  "TARGET_AVX512BW"
10994{
10995  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10996  return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10997}
10998  [(set_attr "type" "sseishft")
10999   (set_attr "length_immediate" "1")
11000   (set_attr "prefix" "maybe_evex")
11001   (set_attr "mode" "<sseinsnmode>")])
11002
11003(define_insn "<sse2_avx2>_<shift_insn><mode>3"
11004  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
11005	(any_lshift:VIMAX_AVX2
11006	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
11007	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
11008  "TARGET_SSE2"
11009{
11010  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11011
11012  switch (which_alternative)
11013    {
11014    case 0:
11015      return "p<vshift>dq\t{%2, %0|%0, %2}";
11016    case 1:
11017      return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11018    default:
11019      gcc_unreachable ();
11020    }
11021}
11022  [(set_attr "isa" "noavx,avx")
11023   (set_attr "type" "sseishft")
11024   (set_attr "length_immediate" "1")
11025   (set_attr "atom_unit" "sishuf")
11026   (set_attr "prefix_data16" "1,*")
11027   (set_attr "prefix" "orig,vex")
11028   (set_attr "mode" "<sseinsnmode>")])
11029
11030(define_insn "<avx512>_<rotate>v<mode><mask_name>"
11031  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11032	(any_rotate:VI48_AVX512VL
11033	  (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11034	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11035  "TARGET_AVX512F"
11036  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11037  [(set_attr "prefix" "evex")
11038   (set_attr "mode" "<sseinsnmode>")])
11039
11040(define_insn "<avx512>_<rotate><mode><mask_name>"
11041  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11042	(any_rotate:VI48_AVX512VL
11043	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
11044	  (match_operand:SI 2 "const_0_to_255_operand")))]
11045  "TARGET_AVX512F"
11046  "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11047  [(set_attr "prefix" "evex")
11048   (set_attr "mode" "<sseinsnmode>")])
11049
11050(define_expand "<code><mode>3"
11051  [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
11052	(maxmin:VI124_256_AVX512F_AVX512BW
11053	  (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
11054	  (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
11055  "TARGET_AVX2"
11056  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11057
11058(define_insn "*avx2_<code><mode>3"
11059  [(set (match_operand:VI124_256 0 "register_operand" "=v")
11060	(maxmin:VI124_256
11061	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
11062	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
11063  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11064  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11065  [(set_attr "type" "sseiadd")
11066   (set_attr "prefix_extra" "1")
11067   (set_attr "prefix" "vex")
11068   (set_attr "mode" "OI")])
11069
11070(define_expand "<code><mode>3_mask"
11071  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11072	(vec_merge:VI48_AVX512VL
11073	  (maxmin:VI48_AVX512VL
11074	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11075	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11076	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11077	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11078  "TARGET_AVX512F"
11079  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11080
11081(define_insn "*avx512f_<code><mode>3<mask_name>"
11082  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11083	(maxmin:VI48_AVX512VL
11084	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11085	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11086  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11087  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11088  [(set_attr "type" "sseiadd")
11089   (set_attr "prefix_extra" "1")
11090   (set_attr "prefix" "maybe_evex")
11091   (set_attr "mode" "<sseinsnmode>")])
11092
11093(define_insn "<mask_codefor><code><mode>3<mask_name>"
11094  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11095        (maxmin:VI12_AVX512VL
11096          (match_operand:VI12_AVX512VL 1 "register_operand" "v")
11097          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
11098  "TARGET_AVX512BW"
11099  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11100  [(set_attr "type" "sseiadd")
11101   (set_attr "prefix" "evex")
11102   (set_attr "mode" "<sseinsnmode>")])
11103
11104(define_expand "<code><mode>3"
11105  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11106	(maxmin:VI8_AVX2_AVX512F
11107	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11108	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11109  "TARGET_SSE4_2"
11110{
11111  if (TARGET_AVX512F
11112      && (<MODE>mode == V8DImode || TARGET_AVX512VL))
11113    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11114  else
11115    {
11116      enum rtx_code code;
11117      rtx xops[6];
11118      bool ok;
11119
11120
11121      xops[0] = operands[0];
11122
11123      if (<CODE> == SMAX || <CODE> == UMAX)
11124	{
11125	  xops[1] = operands[1];
11126	  xops[2] = operands[2];
11127	}
11128      else
11129	{
11130	  xops[1] = operands[2];
11131	  xops[2] = operands[1];
11132	}
11133
11134      code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
11135
11136      xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
11137      xops[4] = operands[1];
11138      xops[5] = operands[2];
11139
11140      ok = ix86_expand_int_vcond (xops);
11141      gcc_assert (ok);
11142      DONE;
11143    }
11144})
11145
11146(define_expand "<code><mode>3"
11147  [(set (match_operand:VI124_128 0 "register_operand")
11148	(smaxmin:VI124_128
11149	  (match_operand:VI124_128 1 "vector_operand")
11150	  (match_operand:VI124_128 2 "vector_operand")))]
11151  "TARGET_SSE2"
11152{
11153  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
11154    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11155  else
11156    {
11157      rtx xops[6];
11158      bool ok;
11159
11160      xops[0] = operands[0];
11161      operands[1] = force_reg (<MODE>mode, operands[1]);
11162      operands[2] = force_reg (<MODE>mode, operands[2]);
11163
11164      if (<CODE> == SMAX)
11165	{
11166	  xops[1] = operands[1];
11167	  xops[2] = operands[2];
11168	}
11169      else
11170	{
11171	  xops[1] = operands[2];
11172	  xops[2] = operands[1];
11173	}
11174
11175      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
11176      xops[4] = operands[1];
11177      xops[5] = operands[2];
11178
11179      ok = ix86_expand_int_vcond (xops);
11180      gcc_assert (ok);
11181      DONE;
11182    }
11183})
11184
11185(define_insn "*sse4_1_<code><mode>3<mask_name>"
11186  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11187	(smaxmin:VI14_128
11188	  (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11189	  (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11190  "TARGET_SSE4_1
11191   && <mask_mode512bit_condition>
11192   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11193  "@
11194   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11195   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11196   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11197  [(set_attr "isa" "noavx,noavx,avx")
11198   (set_attr "type" "sseiadd")
11199   (set_attr "prefix_extra" "1,1,*")
11200   (set_attr "prefix" "orig,orig,vex")
11201   (set_attr "mode" "TI")])
11202
11203(define_insn "*<code>v8hi3"
11204  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11205	(smaxmin:V8HI
11206	  (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11207	  (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11208  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11209  "@
11210   p<maxmin_int>w\t{%2, %0|%0, %2}
11211   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11212   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11213  [(set_attr "isa" "noavx,avx,avx512bw")
11214   (set_attr "type" "sseiadd")
11215   (set_attr "prefix_data16" "1,*,*")
11216   (set_attr "prefix_extra" "*,1,1")
11217   (set_attr "prefix" "orig,vex,evex")
11218   (set_attr "mode" "TI")])
11219
11220(define_expand "<code><mode>3"
11221  [(set (match_operand:VI124_128 0 "register_operand")
11222	(umaxmin:VI124_128
11223	  (match_operand:VI124_128 1 "vector_operand")
11224	  (match_operand:VI124_128 2 "vector_operand")))]
11225  "TARGET_SSE2"
11226{
11227  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11228    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11229  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11230    {
11231      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11232      operands[1] = force_reg (<MODE>mode, operands[1]);
11233      if (rtx_equal_p (op3, op2))
11234	op3 = gen_reg_rtx (V8HImode);
11235      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11236      emit_insn (gen_addv8hi3 (op0, op3, op2));
11237      DONE;
11238    }
11239  else
11240    {
11241      rtx xops[6];
11242      bool ok;
11243
11244      operands[1] = force_reg (<MODE>mode, operands[1]);
11245      operands[2] = force_reg (<MODE>mode, operands[2]);
11246
11247      xops[0] = operands[0];
11248
11249      if (<CODE> == UMAX)
11250	{
11251	  xops[1] = operands[1];
11252	  xops[2] = operands[2];
11253	}
11254      else
11255	{
11256	  xops[1] = operands[2];
11257	  xops[2] = operands[1];
11258	}
11259
11260      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
11261      xops[4] = operands[1];
11262      xops[5] = operands[2];
11263
11264      ok = ix86_expand_int_vcond (xops);
11265      gcc_assert (ok);
11266      DONE;
11267    }
11268})
11269
11270(define_insn "*sse4_1_<code><mode>3<mask_name>"
11271  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
11272	(umaxmin:VI24_128
11273	  (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
11274	  (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11275  "TARGET_SSE4_1
11276   && <mask_mode512bit_condition>
11277   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11278  "@
11279   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11280   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11281   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11282  [(set_attr "isa" "noavx,noavx,avx")
11283   (set_attr "type" "sseiadd")
11284   (set_attr "prefix_extra" "1,1,*")
11285   (set_attr "prefix" "orig,orig,vex")
11286   (set_attr "mode" "TI")])
11287
11288(define_insn "*<code>v16qi3"
11289  [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
11290	(umaxmin:V16QI
11291	  (match_operand:V16QI 1 "vector_operand" "%0,x,v")
11292	  (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
11293  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11294  "@
11295   p<maxmin_int>b\t{%2, %0|%0, %2}
11296   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
11297   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
11298  [(set_attr "isa" "noavx,avx,avx512bw")
11299   (set_attr "type" "sseiadd")
11300   (set_attr "prefix_data16" "1,*,*")
11301   (set_attr "prefix_extra" "*,1,1")
11302   (set_attr "prefix" "orig,vex,evex")
11303   (set_attr "mode" "TI")])
11304
11305;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11306;;
11307;; Parallel integral comparisons
11308;;
11309;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11310
11311(define_expand "avx2_eq<mode>3"
11312  [(set (match_operand:VI_256 0 "register_operand")
11313	(eq:VI_256
11314	  (match_operand:VI_256 1 "nonimmediate_operand")
11315	  (match_operand:VI_256 2 "nonimmediate_operand")))]
11316  "TARGET_AVX2"
11317  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11318
11319(define_insn "*avx2_eq<mode>3"
11320  [(set (match_operand:VI_256 0 "register_operand" "=x")
11321	(eq:VI_256
11322	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
11323	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11324  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11325  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11326  [(set_attr "type" "ssecmp")
11327   (set_attr "prefix_extra" "1")
11328   (set_attr "prefix" "vex")
11329   (set_attr "mode" "OI")])
11330
11331(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11332  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11333	(unspec:<avx512fmaskmode>
11334	  [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11335	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
11336	  UNSPEC_MASKED_EQ))]
11337  "TARGET_AVX512BW"
11338  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11339
11340(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11341  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11342	(unspec:<avx512fmaskmode>
11343	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11344	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
11345	  UNSPEC_MASKED_EQ))]
11346  "TARGET_AVX512F"
11347  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11348
11349(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11350  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11351	(unspec:<avx512fmaskmode>
11352	  [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "%v")
11353	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11354	  UNSPEC_MASKED_EQ))]
11355  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11356  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11357  [(set_attr "type" "ssecmp")
11358   (set_attr "prefix_extra" "1")
11359   (set_attr "prefix" "evex")
11360   (set_attr "mode" "<sseinsnmode>")])
11361
11362(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11363  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11364	(unspec:<avx512fmaskmode>
11365	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11366	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11367	  UNSPEC_MASKED_EQ))]
11368  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11369  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11370  [(set_attr "type" "ssecmp")
11371   (set_attr "prefix_extra" "1")
11372   (set_attr "prefix" "evex")
11373   (set_attr "mode" "<sseinsnmode>")])
11374
11375(define_insn "*sse4_1_eqv2di3"
11376  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11377	(eq:V2DI
11378	  (match_operand:V2DI 1 "vector_operand" "%0,0,x")
11379	  (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11380  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11381  "@
11382   pcmpeqq\t{%2, %0|%0, %2}
11383   pcmpeqq\t{%2, %0|%0, %2}
11384   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
11385  [(set_attr "isa" "noavx,noavx,avx")
11386   (set_attr "type" "ssecmp")
11387   (set_attr "prefix_extra" "1")
11388   (set_attr "prefix" "orig,orig,vex")
11389   (set_attr "mode" "TI")])
11390
11391(define_insn "*sse2_eq<mode>3"
11392  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11393	(eq:VI124_128
11394	  (match_operand:VI124_128 1 "vector_operand" "%0,x")
11395	  (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11396  "TARGET_SSE2 && !TARGET_XOP
11397   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11398  "@
11399   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
11400   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11401  [(set_attr "isa" "noavx,avx")
11402   (set_attr "type" "ssecmp")
11403   (set_attr "prefix_data16" "1,*")
11404   (set_attr "prefix" "orig,vex")
11405   (set_attr "mode" "TI")])
11406
11407(define_expand "sse2_eq<mode>3"
11408  [(set (match_operand:VI124_128 0 "register_operand")
11409	(eq:VI124_128
11410	  (match_operand:VI124_128 1 "vector_operand")
11411	  (match_operand:VI124_128 2 "vector_operand")))]
11412  "TARGET_SSE2 && !TARGET_XOP "
11413  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11414
11415(define_expand "sse4_1_eqv2di3"
11416  [(set (match_operand:V2DI 0 "register_operand")
11417	(eq:V2DI
11418	  (match_operand:V2DI 1 "vector_operand")
11419	  (match_operand:V2DI 2 "vector_operand")))]
11420  "TARGET_SSE4_1"
11421  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
11422
11423(define_insn "sse4_2_gtv2di3"
11424  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11425	(gt:V2DI
11426	  (match_operand:V2DI 1 "register_operand" "0,0,x")
11427	  (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11428  "TARGET_SSE4_2"
11429  "@
11430   pcmpgtq\t{%2, %0|%0, %2}
11431   pcmpgtq\t{%2, %0|%0, %2}
11432   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
11433  [(set_attr "isa" "noavx,noavx,avx")
11434   (set_attr "type" "ssecmp")
11435   (set_attr "prefix_extra" "1")
11436   (set_attr "prefix" "orig,orig,vex")
11437   (set_attr "mode" "TI")])
11438
11439(define_insn "avx2_gt<mode>3"
11440  [(set (match_operand:VI_256 0 "register_operand" "=x")
11441	(gt:VI_256
11442	  (match_operand:VI_256 1 "register_operand" "x")
11443	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11444  "TARGET_AVX2"
11445  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11446  [(set_attr "type" "ssecmp")
11447   (set_attr "prefix_extra" "1")
11448   (set_attr "prefix" "vex")
11449   (set_attr "mode" "OI")])
11450
11451(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11452  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11453	(unspec:<avx512fmaskmode>
11454	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11455	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11456  "TARGET_AVX512F"
11457  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11458  [(set_attr "type" "ssecmp")
11459   (set_attr "prefix_extra" "1")
11460   (set_attr "prefix" "evex")
11461   (set_attr "mode" "<sseinsnmode>")])
11462
11463(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11464  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11465	(unspec:<avx512fmaskmode>
11466	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11467	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11468  "TARGET_AVX512BW"
11469  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11470  [(set_attr "type" "ssecmp")
11471   (set_attr "prefix_extra" "1")
11472   (set_attr "prefix" "evex")
11473   (set_attr "mode" "<sseinsnmode>")])
11474
11475(define_insn "sse2_gt<mode>3"
11476  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11477	(gt:VI124_128
11478	  (match_operand:VI124_128 1 "register_operand" "0,x")
11479	  (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11480  "TARGET_SSE2 && !TARGET_XOP"
11481  "@
11482   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
11483   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11484  [(set_attr "isa" "noavx,avx")
11485   (set_attr "type" "ssecmp")
11486   (set_attr "prefix_data16" "1,*")
11487   (set_attr "prefix" "orig,vex")
11488   (set_attr "mode" "TI")])
11489
11490(define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
11491  [(set (match_operand:V_512 0 "register_operand")
11492	(if_then_else:V_512
11493	  (match_operator 3 ""
11494	    [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11495	     (match_operand:VI_AVX512BW 5 "general_operand")])
11496	  (match_operand:V_512 1)
11497	  (match_operand:V_512 2)))]
11498  "TARGET_AVX512F
11499   && (GET_MODE_NUNITS (<V_512:MODE>mode)
11500       == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11501{
11502  bool ok = ix86_expand_int_vcond (operands);
11503  gcc_assert (ok);
11504  DONE;
11505})
11506
11507(define_expand "vcond<V_256:mode><VI_256:mode>"
11508  [(set (match_operand:V_256 0 "register_operand")
11509	(if_then_else:V_256
11510	  (match_operator 3 ""
11511	    [(match_operand:VI_256 4 "nonimmediate_operand")
11512	     (match_operand:VI_256 5 "general_operand")])
11513	  (match_operand:V_256 1)
11514	  (match_operand:V_256 2)))]
11515  "TARGET_AVX2
11516   && (GET_MODE_NUNITS (<V_256:MODE>mode)
11517       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11518{
11519  bool ok = ix86_expand_int_vcond (operands);
11520  gcc_assert (ok);
11521  DONE;
11522})
11523
11524(define_expand "vcond<V_128:mode><VI124_128:mode>"
11525  [(set (match_operand:V_128 0 "register_operand")
11526	(if_then_else:V_128
11527	  (match_operator 3 ""
11528	    [(match_operand:VI124_128 4 "vector_operand")
11529	     (match_operand:VI124_128 5 "general_operand")])
11530	  (match_operand:V_128 1)
11531	  (match_operand:V_128 2)))]
11532  "TARGET_SSE2
11533   && (GET_MODE_NUNITS (<V_128:MODE>mode)
11534       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11535{
11536  bool ok = ix86_expand_int_vcond (operands);
11537  gcc_assert (ok);
11538  DONE;
11539})
11540
11541(define_expand "vcond<VI8F_128:mode>v2di"
11542  [(set (match_operand:VI8F_128 0 "register_operand")
11543	(if_then_else:VI8F_128
11544	  (match_operator 3 ""
11545	    [(match_operand:V2DI 4 "vector_operand")
11546	     (match_operand:V2DI 5 "general_operand")])
11547	  (match_operand:VI8F_128 1)
11548	  (match_operand:VI8F_128 2)))]
11549  "TARGET_SSE4_2"
11550{
11551  bool ok = ix86_expand_int_vcond (operands);
11552  gcc_assert (ok);
11553  DONE;
11554})
11555
11556(define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
11557  [(set (match_operand:V_512 0 "register_operand")
11558	(if_then_else:V_512
11559	  (match_operator 3 ""
11560	    [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11561	     (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
11562	  (match_operand:V_512 1 "general_operand")
11563	  (match_operand:V_512 2 "general_operand")))]
11564  "TARGET_AVX512F
11565   && (GET_MODE_NUNITS (<V_512:MODE>mode)
11566       == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11567{
11568  bool ok = ix86_expand_int_vcond (operands);
11569  gcc_assert (ok);
11570  DONE;
11571})
11572
11573(define_expand "vcondu<V_256:mode><VI_256:mode>"
11574  [(set (match_operand:V_256 0 "register_operand")
11575	(if_then_else:V_256
11576	  (match_operator 3 ""
11577	    [(match_operand:VI_256 4 "nonimmediate_operand")
11578	     (match_operand:VI_256 5 "nonimmediate_operand")])
11579	  (match_operand:V_256 1 "general_operand")
11580	  (match_operand:V_256 2 "general_operand")))]
11581  "TARGET_AVX2
11582   && (GET_MODE_NUNITS (<V_256:MODE>mode)
11583       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11584{
11585  bool ok = ix86_expand_int_vcond (operands);
11586  gcc_assert (ok);
11587  DONE;
11588})
11589
11590(define_expand "vcondu<V_128:mode><VI124_128:mode>"
11591  [(set (match_operand:V_128 0 "register_operand")
11592	(if_then_else:V_128
11593	  (match_operator 3 ""
11594	    [(match_operand:VI124_128 4 "vector_operand")
11595	     (match_operand:VI124_128 5 "vector_operand")])
11596	  (match_operand:V_128 1 "general_operand")
11597	  (match_operand:V_128 2 "general_operand")))]
11598  "TARGET_SSE2
11599   && (GET_MODE_NUNITS (<V_128:MODE>mode)
11600       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11601{
11602  bool ok = ix86_expand_int_vcond (operands);
11603  gcc_assert (ok);
11604  DONE;
11605})
11606
11607(define_expand "vcondu<VI8F_128:mode>v2di"
11608  [(set (match_operand:VI8F_128 0 "register_operand")
11609	(if_then_else:VI8F_128
11610	  (match_operator 3 ""
11611	    [(match_operand:V2DI 4 "vector_operand")
11612	     (match_operand:V2DI 5 "vector_operand")])
11613	  (match_operand:VI8F_128 1 "general_operand")
11614	  (match_operand:VI8F_128 2 "general_operand")))]
11615  "TARGET_SSE4_2"
11616{
11617  bool ok = ix86_expand_int_vcond (operands);
11618  gcc_assert (ok);
11619  DONE;
11620})
11621
11622(define_expand "vcondeq<VI8F_128:mode>v2di"
11623  [(set (match_operand:VI8F_128 0 "register_operand")
11624	(if_then_else:VI8F_128
11625	  (match_operator 3 ""
11626	    [(match_operand:V2DI 4 "vector_operand")
11627	     (match_operand:V2DI 5 "general_operand")])
11628	  (match_operand:VI8F_128 1)
11629	  (match_operand:VI8F_128 2)))]
11630  "TARGET_SSE4_1"
11631{
11632  bool ok = ix86_expand_int_vcond (operands);
11633  gcc_assert (ok);
11634  DONE;
11635})
11636
11637(define_mode_iterator VEC_PERM_AVX2
11638  [V16QI V8HI V4SI V2DI V4SF V2DF
11639   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11640   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11641   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11642   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11643   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11644   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11645
11646(define_expand "vec_perm<mode>"
11647  [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11648   (match_operand:VEC_PERM_AVX2 1 "register_operand")
11649   (match_operand:VEC_PERM_AVX2 2 "register_operand")
11650   (match_operand:<sseintvecmode> 3 "register_operand")]
11651  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11652{
11653  ix86_expand_vec_perm (operands);
11654  DONE;
11655})
11656
11657;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11658;;
11659;; Parallel bitwise logical operations
11660;;
11661;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11662
11663(define_expand "one_cmpl<mode>2"
11664  [(set (match_operand:VI 0 "register_operand")
11665	(xor:VI (match_operand:VI 1 "vector_operand")
11666		(match_dup 2)))]
11667  "TARGET_SSE"
11668{
11669  operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
11670})
11671
11672(define_expand "<sse2_avx2>_andnot<mode>3"
11673  [(set (match_operand:VI_AVX2 0 "register_operand")
11674	(and:VI_AVX2
11675	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11676	  (match_operand:VI_AVX2 2 "vector_operand")))]
11677  "TARGET_SSE2")
11678
11679(define_expand "<sse2_avx2>_andnot<mode>3_mask"
11680  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11681	(vec_merge:VI48_AVX512VL
11682	  (and:VI48_AVX512VL
11683	    (not:VI48_AVX512VL
11684	      (match_operand:VI48_AVX512VL 1 "register_operand"))
11685	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11686	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11687	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11688  "TARGET_AVX512F")
11689
11690(define_expand "<sse2_avx2>_andnot<mode>3_mask"
11691  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11692	(vec_merge:VI12_AVX512VL
11693	  (and:VI12_AVX512VL
11694	    (not:VI12_AVX512VL
11695	      (match_operand:VI12_AVX512VL 1 "register_operand"))
11696	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11697	  (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11698	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11699  "TARGET_AVX512BW")
11700
11701(define_insn "*andnot<mode>3"
11702  [(set (match_operand:VI 0 "register_operand" "=x,x,v")
11703	(and:VI
11704	  (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
11705	  (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
11706  "TARGET_SSE"
11707{
11708  static char buf[64];
11709  const char *ops;
11710  const char *tmp;
11711  const char *ssesuffix;
11712
11713  switch (get_attr_mode (insn))
11714    {
11715    case MODE_XI:
11716      gcc_assert (TARGET_AVX512F);
11717      /* FALLTHRU */
11718    case MODE_OI:
11719      gcc_assert (TARGET_AVX2);
11720      /* FALLTHRU */
11721    case MODE_TI:
11722      gcc_assert (TARGET_SSE2);
11723      tmp = "pandn";
11724      switch (<MODE>mode)
11725	{
11726	case E_V64QImode:
11727	case E_V32HImode:
11728	  /* There is no vpandnb or vpandnw instruction, nor vpandn for
11729	     512-bit vectors. Use vpandnq instead.  */
11730	  ssesuffix = "q";
11731	  break;
11732	case E_V16SImode:
11733	case E_V8DImode:
11734	  ssesuffix = "<ssemodesuffix>";
11735	  break;
11736	case E_V8SImode:
11737	case E_V4DImode:
11738	case E_V4SImode:
11739	case E_V2DImode:
11740	  ssesuffix = (TARGET_AVX512VL && which_alternative == 2
11741		       ? "<ssemodesuffix>" : "");
11742	  break;
11743	default:
11744	  ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11745	}
11746      break;
11747
11748    case MODE_V16SF:
11749      gcc_assert (TARGET_AVX512F);
11750      /* FALLTHRU */
11751    case MODE_V8SF:
11752      gcc_assert (TARGET_AVX);
11753      /* FALLTHRU */
11754    case MODE_V4SF:
11755      gcc_assert (TARGET_SSE);
11756      tmp = "andn";
11757      ssesuffix = "ps";
11758      break;
11759
11760    default:
11761      gcc_unreachable ();
11762    }
11763
11764  switch (which_alternative)
11765    {
11766    case 0:
11767      ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11768      break;
11769    case 1:
11770    case 2:
11771      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11772      break;
11773    default:
11774      gcc_unreachable ();
11775    }
11776
11777  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11778  return buf;
11779}
11780  [(set_attr "isa" "noavx,avx,avx")
11781   (set_attr "type" "sselog")
11782   (set (attr "prefix_data16")
11783     (if_then_else
11784       (and (eq_attr "alternative" "0")
11785	    (eq_attr "mode" "TI"))
11786       (const_string "1")
11787       (const_string "*")))
11788   (set_attr "prefix" "orig,vex,evex")
11789   (set (attr "mode")
11790	(cond [(and (match_test "<MODE_SIZE> == 16")
11791		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11792		 (const_string "<ssePSmode>")
11793	       (match_test "TARGET_AVX2")
11794		 (const_string "<sseinsnmode>")
11795	       (match_test "TARGET_AVX")
11796		 (if_then_else
11797		   (match_test "<MODE_SIZE> > 16")
11798		   (const_string "V8SF")
11799		   (const_string "<sseinsnmode>"))
11800	       (ior (not (match_test "TARGET_SSE2"))
11801		    (match_test "optimize_function_for_size_p (cfun)"))
11802		 (const_string "V4SF")
11803	      ]
11804	      (const_string "<sseinsnmode>")))])
11805
11806(define_insn "*andnot<mode>3_mask"
11807  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11808	(vec_merge:VI48_AVX512VL
11809	  (and:VI48_AVX512VL
11810	    (not:VI48_AVX512VL
11811	      (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11812	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11813	  (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11814	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11815  "TARGET_AVX512F"
11816  "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11817  [(set_attr "type" "sselog")
11818   (set_attr "prefix" "evex")
11819   (set_attr "mode" "<sseinsnmode>")])
11820
11821(define_expand "<code><mode>3"
11822  [(set (match_operand:VI 0 "register_operand")
11823	(any_logic:VI
11824	  (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11825	  (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11826  "TARGET_SSE"
11827{
11828  ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11829  DONE;
11830})
11831
11832(define_insn "<mask_codefor><code><mode>3<mask_name>"
11833  [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
11834	(any_logic:VI48_AVX_AVX512F
11835	  (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11836	  (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11837  "TARGET_SSE && <mask_mode512bit_condition>
11838   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11839{
11840  static char buf[64];
11841  const char *ops;
11842  const char *tmp;
11843  const char *ssesuffix;
11844
11845  switch (get_attr_mode (insn))
11846    {
11847    case MODE_XI:
11848      gcc_assert (TARGET_AVX512F);
11849      /* FALLTHRU */
11850    case MODE_OI:
11851      gcc_assert (TARGET_AVX2);
11852      /* FALLTHRU */
11853    case MODE_TI:
11854      gcc_assert (TARGET_SSE2);
11855      tmp = "p<logic>";
11856      switch (<MODE>mode)
11857	{
11858	case E_V16SImode:
11859	case E_V8DImode:
11860	  ssesuffix = "<ssemodesuffix>";
11861	  break;
11862	case E_V8SImode:
11863	case E_V4DImode:
11864	case E_V4SImode:
11865	case E_V2DImode:
11866	  ssesuffix = (TARGET_AVX512VL
11867		       && (<mask_applied> || which_alternative == 2)
11868		       ? "<ssemodesuffix>" : "");
11869	  break;
11870	default:
11871	  gcc_unreachable ();
11872	}
11873      break;
11874
11875    case MODE_V8SF:
11876      gcc_assert (TARGET_AVX);
11877      /* FALLTHRU */
11878    case MODE_V4SF:
11879      gcc_assert (TARGET_SSE);
11880      tmp = "<logic>";
11881      ssesuffix = "ps";
11882      break;
11883
11884    default:
11885      gcc_unreachable ();
11886    }
11887
11888  switch (which_alternative)
11889    {
11890    case 0:
11891      if (<mask_applied>)
11892        ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11893      else
11894        ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11895      break;
11896    case 1:
11897    case 2:
11898      ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11899      break;
11900    default:
11901      gcc_unreachable ();
11902    }
11903
11904  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11905  return buf;
11906}
11907  [(set_attr "isa" "noavx,avx,avx")
11908   (set_attr "type" "sselog")
11909   (set (attr "prefix_data16")
11910     (if_then_else
11911       (and (eq_attr "alternative" "0")
11912	    (eq_attr "mode" "TI"))
11913       (const_string "1")
11914       (const_string "*")))
11915   (set_attr "prefix" "<mask_prefix3>,evex")
11916   (set (attr "mode")
11917	(cond [(and (match_test "<MODE_SIZE> == 16")
11918		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11919		 (const_string "<ssePSmode>")
11920	       (match_test "TARGET_AVX2")
11921		 (const_string "<sseinsnmode>")
11922	       (match_test "TARGET_AVX")
11923		 (if_then_else
11924		   (match_test "<MODE_SIZE> > 16")
11925		   (const_string "V8SF")
11926		   (const_string "<sseinsnmode>"))
11927	       (ior (not (match_test "TARGET_SSE2"))
11928		    (match_test "optimize_function_for_size_p (cfun)"))
11929		 (const_string "V4SF")
11930	      ]
11931	      (const_string "<sseinsnmode>")))])
11932
11933(define_insn "*<code><mode>3"
11934  [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
11935	(any_logic:VI12_AVX_AVX512F
11936	  (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11937	  (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11938  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11939{
11940  static char buf[64];
11941  const char *ops;
11942  const char *tmp;
11943  const char *ssesuffix;
11944
11945  switch (get_attr_mode (insn))
11946    {
11947    case MODE_XI:
11948      gcc_assert (TARGET_AVX512F);
11949      /* FALLTHRU */
11950    case MODE_OI:
11951      gcc_assert (TARGET_AVX2);
11952      /* FALLTHRU */
11953    case MODE_TI:
11954      gcc_assert (TARGET_SSE2);
11955      tmp = "p<logic>";
11956      switch (<MODE>mode)
11957	{
11958	case E_V64QImode:
11959	case E_V32HImode:
11960	  ssesuffix = "q";
11961	  break;
11962	case E_V32QImode:
11963	case E_V16HImode:
11964	case E_V16QImode:
11965	case E_V8HImode:
11966	  ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11967	  break;
11968	default:
11969	  gcc_unreachable ();
11970	}
11971      break;
11972
11973    case MODE_V8SF:
11974      gcc_assert (TARGET_AVX);
11975      /* FALLTHRU */
11976    case MODE_V4SF:
11977      gcc_assert (TARGET_SSE);
11978      tmp = "<logic>";
11979      ssesuffix = "ps";
11980      break;
11981
11982    default:
11983      gcc_unreachable ();
11984    }
11985
11986  switch (which_alternative)
11987    {
11988    case 0:
11989      ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11990      break;
11991    case 1:
11992    case 2:
11993      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11994      break;
11995    default:
11996      gcc_unreachable ();
11997    }
11998
11999  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12000  return buf;
12001}
12002  [(set_attr "isa" "noavx,avx,avx")
12003   (set_attr "type" "sselog")
12004   (set (attr "prefix_data16")
12005     (if_then_else
12006       (and (eq_attr "alternative" "0")
12007	    (eq_attr "mode" "TI"))
12008       (const_string "1")
12009       (const_string "*")))
12010   (set_attr "prefix" "orig,vex,evex")
12011   (set (attr "mode")
12012	(cond [(and (match_test "<MODE_SIZE> == 16")
12013		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12014		 (const_string "<ssePSmode>")
12015	       (match_test "TARGET_AVX2")
12016		 (const_string "<sseinsnmode>")
12017	       (match_test "TARGET_AVX")
12018		 (if_then_else
12019		   (match_test "<MODE_SIZE> > 16")
12020		   (const_string "V8SF")
12021		   (const_string "<sseinsnmode>"))
12022	       (ior (not (match_test "TARGET_SSE2"))
12023		    (match_test "optimize_function_for_size_p (cfun)"))
12024		 (const_string "V4SF")
12025	      ]
12026	      (const_string "<sseinsnmode>")))])
12027
12028(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12029  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12030	(unspec:<avx512fmaskmode>
12031	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12032	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
12033	 UNSPEC_TESTM))]
12034  "TARGET_AVX512BW"
12035  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12036  [(set_attr "prefix" "evex")
12037   (set_attr "mode"  "<sseinsnmode>")])
12038
12039(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12040  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12041	(unspec:<avx512fmaskmode>
12042	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12043	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
12044	 UNSPEC_TESTM))]
12045  "TARGET_AVX512F"
12046  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12047  [(set_attr "prefix" "evex")
12048   (set_attr "mode"  "<sseinsnmode>")])
12049
12050(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12051  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12052	(unspec:<avx512fmaskmode>
12053	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12054	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
12055	 UNSPEC_TESTNM))]
12056  "TARGET_AVX512BW"
12057  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12058  [(set_attr "prefix" "evex")
12059   (set_attr "mode"  "<sseinsnmode>")])
12060
12061(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12062  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12063	(unspec:<avx512fmaskmode>
12064	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12065	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
12066	 UNSPEC_TESTNM))]
12067  "TARGET_AVX512F"
12068  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12069  [(set_attr "prefix" "evex")
12070   (set_attr "mode"  "<sseinsnmode>")])
12071
12072;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12073;;
12074;; Parallel integral element swizzling
12075;;
12076;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12077
12078(define_expand "vec_pack_trunc_<mode>"
12079  [(match_operand:<ssepackmode> 0 "register_operand")
12080   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
12081   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
12082  "TARGET_SSE2"
12083{
12084  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
12085  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
12086  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
12087  DONE;
12088})
12089
12090(define_expand "vec_pack_trunc_qi"
12091  [(set (match_operand:HI 0 ("register_operand"))
12092        (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
12093                           (const_int 8))
12094                (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
12095  "TARGET_AVX512F")
12096
12097(define_expand "vec_pack_trunc_<mode>"
12098  [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
12099        (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
12100                           (match_dup 3))
12101                (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
12102  "TARGET_AVX512BW"
12103{
12104  operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
12105})
12106
12107(define_insn "<sse2_avx2>_packsswb<mask_name>"
12108  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12109	(vec_concat:VI1_AVX512
12110	  (ss_truncate:<ssehalfvecmode>
12111	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12112	  (ss_truncate:<ssehalfvecmode>
12113	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12114  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12115  "@
12116   packsswb\t{%2, %0|%0, %2}
12117   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12118   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12119  [(set_attr "isa" "noavx,avx,avx512bw")
12120   (set_attr "type" "sselog")
12121   (set_attr "prefix_data16" "1,*,*")
12122   (set_attr "prefix" "orig,<mask_prefix>,evex")
12123   (set_attr "mode" "<sseinsnmode>")])
12124
12125(define_insn "<sse2_avx2>_packssdw<mask_name>"
12126  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
12127	(vec_concat:VI2_AVX2
12128	  (ss_truncate:<ssehalfvecmode>
12129	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12130	  (ss_truncate:<ssehalfvecmode>
12131	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12132  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12133  "@
12134   packssdw\t{%2, %0|%0, %2}
12135   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12136   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12137  [(set_attr "isa" "noavx,avx,avx512bw")
12138   (set_attr "type" "sselog")
12139   (set_attr "prefix_data16" "1,*,*")
12140   (set_attr "prefix" "orig,<mask_prefix>,evex")
12141   (set_attr "mode" "<sseinsnmode>")])
12142
12143(define_insn "<sse2_avx2>_packuswb<mask_name>"
12144  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12145	(vec_concat:VI1_AVX512
12146	  (us_truncate:<ssehalfvecmode>
12147	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12148	  (us_truncate:<ssehalfvecmode>
12149	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12150  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12151  "@
12152   packuswb\t{%2, %0|%0, %2}
12153   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12154   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12155  [(set_attr "isa" "noavx,avx,avx512bw")
12156   (set_attr "type" "sselog")
12157   (set_attr "prefix_data16" "1,*,*")
12158   (set_attr "prefix" "orig,<mask_prefix>,evex")
12159   (set_attr "mode" "<sseinsnmode>")])
12160
12161(define_insn "avx512bw_interleave_highv64qi<mask_name>"
12162  [(set (match_operand:V64QI 0 "register_operand" "=v")
12163	(vec_select:V64QI
12164	  (vec_concat:V128QI
12165	    (match_operand:V64QI 1 "register_operand" "v")
12166	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12167	  (parallel [(const_int 8)  (const_int 72)
12168		     (const_int 9)  (const_int 73)
12169		     (const_int 10) (const_int 74)
12170		     (const_int 11) (const_int 75)
12171		     (const_int 12) (const_int 76)
12172		     (const_int 13) (const_int 77)
12173		     (const_int 14) (const_int 78)
12174		     (const_int 15) (const_int 79)
12175		     (const_int 24) (const_int 88)
12176		     (const_int 25) (const_int 89)
12177		     (const_int 26) (const_int 90)
12178		     (const_int 27) (const_int 91)
12179		     (const_int 28) (const_int 92)
12180		     (const_int 29) (const_int 93)
12181		     (const_int 30) (const_int 94)
12182		     (const_int 31) (const_int 95)
12183		     (const_int 40) (const_int 104)
12184		     (const_int 41) (const_int 105)
12185		     (const_int 42) (const_int 106)
12186		     (const_int 43) (const_int 107)
12187		     (const_int 44) (const_int 108)
12188		     (const_int 45) (const_int 109)
12189		     (const_int 46) (const_int 110)
12190		     (const_int 47) (const_int 111)
12191		     (const_int 56) (const_int 120)
12192		     (const_int 57) (const_int 121)
12193		     (const_int 58) (const_int 122)
12194		     (const_int 59) (const_int 123)
12195		     (const_int 60) (const_int 124)
12196		     (const_int 61) (const_int 125)
12197		     (const_int 62) (const_int 126)
12198		     (const_int 63) (const_int 127)])))]
12199  "TARGET_AVX512BW"
12200  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12201  [(set_attr "type" "sselog")
12202   (set_attr "prefix" "evex")
12203   (set_attr "mode" "XI")])
12204
12205(define_insn "avx2_interleave_highv32qi<mask_name>"
12206  [(set (match_operand:V32QI 0 "register_operand" "=v")
12207	(vec_select:V32QI
12208	  (vec_concat:V64QI
12209	    (match_operand:V32QI 1 "register_operand" "v")
12210	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12211	  (parallel [(const_int 8)  (const_int 40)
12212		     (const_int 9)  (const_int 41)
12213		     (const_int 10) (const_int 42)
12214		     (const_int 11) (const_int 43)
12215		     (const_int 12) (const_int 44)
12216		     (const_int 13) (const_int 45)
12217		     (const_int 14) (const_int 46)
12218		     (const_int 15) (const_int 47)
12219		     (const_int 24) (const_int 56)
12220		     (const_int 25) (const_int 57)
12221		     (const_int 26) (const_int 58)
12222		     (const_int 27) (const_int 59)
12223		     (const_int 28) (const_int 60)
12224		     (const_int 29) (const_int 61)
12225		     (const_int 30) (const_int 62)
12226		     (const_int 31) (const_int 63)])))]
12227  "TARGET_AVX2 && <mask_avx512vl_condition>"
12228  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12229  [(set_attr "type" "sselog")
12230   (set_attr "prefix" "<mask_prefix>")
12231   (set_attr "mode" "OI")])
12232
12233(define_insn "vec_interleave_highv16qi<mask_name>"
12234  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12235	(vec_select:V16QI
12236	  (vec_concat:V32QI
12237	    (match_operand:V16QI 1 "register_operand" "0,v")
12238	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12239	  (parallel [(const_int 8)  (const_int 24)
12240		     (const_int 9)  (const_int 25)
12241		     (const_int 10) (const_int 26)
12242		     (const_int 11) (const_int 27)
12243		     (const_int 12) (const_int 28)
12244		     (const_int 13) (const_int 29)
12245		     (const_int 14) (const_int 30)
12246		     (const_int 15) (const_int 31)])))]
12247  "TARGET_SSE2 && <mask_avx512vl_condition>"
12248  "@
12249   punpckhbw\t{%2, %0|%0, %2}
12250   vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12251  [(set_attr "isa" "noavx,avx")
12252   (set_attr "type" "sselog")
12253   (set_attr "prefix_data16" "1,*")
12254   (set_attr "prefix" "orig,<mask_prefix>")
12255   (set_attr "mode" "TI")])
12256
12257(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
12258  [(set (match_operand:V64QI 0 "register_operand" "=v")
12259	(vec_select:V64QI
12260	  (vec_concat:V128QI
12261	    (match_operand:V64QI 1 "register_operand" "v")
12262	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12263	  (parallel [(const_int 0) (const_int 64)
12264		     (const_int 1) (const_int 65)
12265		     (const_int 2) (const_int 66)
12266		     (const_int 3) (const_int 67)
12267		     (const_int 4) (const_int 68)
12268		     (const_int 5) (const_int 69)
12269		     (const_int 6) (const_int 70)
12270		     (const_int 7) (const_int 71)
12271		     (const_int 16) (const_int 80)
12272		     (const_int 17) (const_int 81)
12273		     (const_int 18) (const_int 82)
12274		     (const_int 19) (const_int 83)
12275		     (const_int 20) (const_int 84)
12276		     (const_int 21) (const_int 85)
12277		     (const_int 22) (const_int 86)
12278		     (const_int 23) (const_int 87)
12279		     (const_int 32) (const_int 96)
12280		     (const_int 33) (const_int 97)
12281		     (const_int 34) (const_int 98)
12282		     (const_int 35) (const_int 99)
12283		     (const_int 36) (const_int 100)
12284		     (const_int 37) (const_int 101)
12285		     (const_int 38) (const_int 102)
12286		     (const_int 39) (const_int 103)
12287		     (const_int 48) (const_int 112)
12288		     (const_int 49) (const_int 113)
12289		     (const_int 50) (const_int 114)
12290		     (const_int 51) (const_int 115)
12291		     (const_int 52) (const_int 116)
12292		     (const_int 53) (const_int 117)
12293		     (const_int 54) (const_int 118)
12294		     (const_int 55) (const_int 119)])))]
12295  "TARGET_AVX512BW"
12296  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12297  [(set_attr "type" "sselog")
12298   (set_attr "prefix" "evex")
12299   (set_attr "mode" "XI")])
12300
12301(define_insn "avx2_interleave_lowv32qi<mask_name>"
12302  [(set (match_operand:V32QI 0 "register_operand" "=v")
12303	(vec_select:V32QI
12304	  (vec_concat:V64QI
12305	    (match_operand:V32QI 1 "register_operand" "v")
12306	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12307	  (parallel [(const_int 0) (const_int 32)
12308		     (const_int 1) (const_int 33)
12309		     (const_int 2) (const_int 34)
12310		     (const_int 3) (const_int 35)
12311		     (const_int 4) (const_int 36)
12312		     (const_int 5) (const_int 37)
12313		     (const_int 6) (const_int 38)
12314		     (const_int 7) (const_int 39)
12315		     (const_int 16) (const_int 48)
12316		     (const_int 17) (const_int 49)
12317		     (const_int 18) (const_int 50)
12318		     (const_int 19) (const_int 51)
12319		     (const_int 20) (const_int 52)
12320		     (const_int 21) (const_int 53)
12321		     (const_int 22) (const_int 54)
12322		     (const_int 23) (const_int 55)])))]
12323  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12324  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12325  [(set_attr "type" "sselog")
12326   (set_attr "prefix" "maybe_vex")
12327   (set_attr "mode" "OI")])
12328
12329(define_insn "vec_interleave_lowv16qi<mask_name>"
12330  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12331	(vec_select:V16QI
12332	  (vec_concat:V32QI
12333	    (match_operand:V16QI 1 "register_operand" "0,v")
12334	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12335	  (parallel [(const_int 0) (const_int 16)
12336		     (const_int 1) (const_int 17)
12337		     (const_int 2) (const_int 18)
12338		     (const_int 3) (const_int 19)
12339		     (const_int 4) (const_int 20)
12340		     (const_int 5) (const_int 21)
12341		     (const_int 6) (const_int 22)
12342		     (const_int 7) (const_int 23)])))]
12343  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12344  "@
12345   punpcklbw\t{%2, %0|%0, %2}
12346   vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12347  [(set_attr "isa" "noavx,avx")
12348   (set_attr "type" "sselog")
12349   (set_attr "prefix_data16" "1,*")
12350   (set_attr "prefix" "orig,vex")
12351   (set_attr "mode" "TI")])
12352
12353(define_insn "avx512bw_interleave_highv32hi<mask_name>"
12354  [(set (match_operand:V32HI 0 "register_operand" "=v")
12355	(vec_select:V32HI
12356	  (vec_concat:V64HI
12357	    (match_operand:V32HI 1 "register_operand" "v")
12358	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12359	  (parallel [(const_int 4) (const_int 36)
12360		     (const_int 5) (const_int 37)
12361		     (const_int 6) (const_int 38)
12362		     (const_int 7) (const_int 39)
12363		     (const_int 12) (const_int 44)
12364		     (const_int 13) (const_int 45)
12365		     (const_int 14) (const_int 46)
12366		     (const_int 15) (const_int 47)
12367		     (const_int 20) (const_int 52)
12368		     (const_int 21) (const_int 53)
12369		     (const_int 22) (const_int 54)
12370		     (const_int 23) (const_int 55)
12371		     (const_int 28) (const_int 60)
12372		     (const_int 29) (const_int 61)
12373		     (const_int 30) (const_int 62)
12374		     (const_int 31) (const_int 63)])))]
12375  "TARGET_AVX512BW"
12376  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12377  [(set_attr "type" "sselog")
12378   (set_attr "prefix" "evex")
12379   (set_attr "mode" "XI")])
12380
12381(define_insn "avx2_interleave_highv16hi<mask_name>"
12382  [(set (match_operand:V16HI 0 "register_operand" "=v")
12383	(vec_select:V16HI
12384	  (vec_concat:V32HI
12385	    (match_operand:V16HI 1 "register_operand" "v")
12386	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12387	  (parallel [(const_int 4) (const_int 20)
12388		     (const_int 5) (const_int 21)
12389		     (const_int 6) (const_int 22)
12390		     (const_int 7) (const_int 23)
12391		     (const_int 12) (const_int 28)
12392		     (const_int 13) (const_int 29)
12393		     (const_int 14) (const_int 30)
12394		     (const_int 15) (const_int 31)])))]
12395  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12396  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12397  [(set_attr "type" "sselog")
12398   (set_attr "prefix" "maybe_evex")
12399   (set_attr "mode" "OI")])
12400
12401(define_insn "vec_interleave_highv8hi<mask_name>"
12402  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12403	(vec_select:V8HI
12404	  (vec_concat:V16HI
12405	    (match_operand:V8HI 1 "register_operand" "0,v")
12406	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12407	  (parallel [(const_int 4) (const_int 12)
12408		     (const_int 5) (const_int 13)
12409		     (const_int 6) (const_int 14)
12410		     (const_int 7) (const_int 15)])))]
12411  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12412  "@
12413   punpckhwd\t{%2, %0|%0, %2}
12414   vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12415  [(set_attr "isa" "noavx,avx")
12416   (set_attr "type" "sselog")
12417   (set_attr "prefix_data16" "1,*")
12418   (set_attr "prefix" "orig,maybe_vex")
12419   (set_attr "mode" "TI")])
12420
12421(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
12422  [(set (match_operand:V32HI 0 "register_operand" "=v")
12423	(vec_select:V32HI
12424	  (vec_concat:V64HI
12425	    (match_operand:V32HI 1 "register_operand" "v")
12426	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12427	  (parallel [(const_int 0) (const_int 32)
12428		     (const_int 1) (const_int 33)
12429		     (const_int 2) (const_int 34)
12430		     (const_int 3) (const_int 35)
12431		     (const_int 8) (const_int 40)
12432		     (const_int 9) (const_int 41)
12433		     (const_int 10) (const_int 42)
12434		     (const_int 11) (const_int 43)
12435		     (const_int 16) (const_int 48)
12436		     (const_int 17) (const_int 49)
12437		     (const_int 18) (const_int 50)
12438		     (const_int 19) (const_int 51)
12439		     (const_int 24) (const_int 56)
12440		     (const_int 25) (const_int 57)
12441		     (const_int 26) (const_int 58)
12442		     (const_int 27) (const_int 59)])))]
12443  "TARGET_AVX512BW"
12444  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12445  [(set_attr "type" "sselog")
12446   (set_attr "prefix" "evex")
12447   (set_attr "mode" "XI")])
12448
12449(define_insn "avx2_interleave_lowv16hi<mask_name>"
12450  [(set (match_operand:V16HI 0 "register_operand" "=v")
12451	(vec_select:V16HI
12452	  (vec_concat:V32HI
12453	    (match_operand:V16HI 1 "register_operand" "v")
12454	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12455	  (parallel [(const_int 0) (const_int 16)
12456		     (const_int 1) (const_int 17)
12457		     (const_int 2) (const_int 18)
12458		     (const_int 3) (const_int 19)
12459		     (const_int 8) (const_int 24)
12460		     (const_int 9) (const_int 25)
12461		     (const_int 10) (const_int 26)
12462		     (const_int 11) (const_int 27)])))]
12463  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12464  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12465  [(set_attr "type" "sselog")
12466   (set_attr "prefix" "maybe_evex")
12467   (set_attr "mode" "OI")])
12468
12469(define_insn "vec_interleave_lowv8hi<mask_name>"
12470  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12471	(vec_select:V8HI
12472	  (vec_concat:V16HI
12473	    (match_operand:V8HI 1 "register_operand" "0,v")
12474	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12475	  (parallel [(const_int 0) (const_int 8)
12476		     (const_int 1) (const_int 9)
12477		     (const_int 2) (const_int 10)
12478		     (const_int 3) (const_int 11)])))]
12479  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12480  "@
12481   punpcklwd\t{%2, %0|%0, %2}
12482   vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12483  [(set_attr "isa" "noavx,avx")
12484   (set_attr "type" "sselog")
12485   (set_attr "prefix_data16" "1,*")
12486   (set_attr "prefix" "orig,maybe_evex")
12487   (set_attr "mode" "TI")])
12488
12489(define_insn "avx2_interleave_highv8si<mask_name>"
12490  [(set (match_operand:V8SI 0 "register_operand" "=v")
12491	(vec_select:V8SI
12492	  (vec_concat:V16SI
12493	    (match_operand:V8SI 1 "register_operand" "v")
12494	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12495	  (parallel [(const_int 2) (const_int 10)
12496		     (const_int 3) (const_int 11)
12497		     (const_int 6) (const_int 14)
12498		     (const_int 7) (const_int 15)])))]
12499  "TARGET_AVX2 && <mask_avx512vl_condition>"
12500  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12501  [(set_attr "type" "sselog")
12502   (set_attr "prefix" "maybe_evex")
12503   (set_attr "mode" "OI")])
12504
12505(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12506  [(set (match_operand:V16SI 0 "register_operand" "=v")
12507	(vec_select:V16SI
12508	  (vec_concat:V32SI
12509	    (match_operand:V16SI 1 "register_operand" "v")
12510	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12511	  (parallel [(const_int 2) (const_int 18)
12512		     (const_int 3) (const_int 19)
12513		     (const_int 6) (const_int 22)
12514		     (const_int 7) (const_int 23)
12515		     (const_int 10) (const_int 26)
12516		     (const_int 11) (const_int 27)
12517		     (const_int 14) (const_int 30)
12518		     (const_int 15) (const_int 31)])))]
12519  "TARGET_AVX512F"
12520  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12521  [(set_attr "type" "sselog")
12522   (set_attr "prefix" "evex")
12523   (set_attr "mode" "XI")])
12524
12525
12526(define_insn "vec_interleave_highv4si<mask_name>"
12527  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12528	(vec_select:V4SI
12529	  (vec_concat:V8SI
12530	    (match_operand:V4SI 1 "register_operand" "0,v")
12531	    (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12532	  (parallel [(const_int 2) (const_int 6)
12533		     (const_int 3) (const_int 7)])))]
12534  "TARGET_SSE2 && <mask_avx512vl_condition>"
12535  "@
12536   punpckhdq\t{%2, %0|%0, %2}
12537   vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12538  [(set_attr "isa" "noavx,avx")
12539   (set_attr "type" "sselog")
12540   (set_attr "prefix_data16" "1,*")
12541   (set_attr "prefix" "orig,maybe_vex")
12542   (set_attr "mode" "TI")])
12543
12544(define_insn "avx2_interleave_lowv8si<mask_name>"
12545  [(set (match_operand:V8SI 0 "register_operand" "=v")
12546	(vec_select:V8SI
12547	  (vec_concat:V16SI
12548	    (match_operand:V8SI 1 "register_operand" "v")
12549	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12550	  (parallel [(const_int 0) (const_int 8)
12551		     (const_int 1) (const_int 9)
12552		     (const_int 4) (const_int 12)
12553		     (const_int 5) (const_int 13)])))]
12554  "TARGET_AVX2 && <mask_avx512vl_condition>"
12555  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12556  [(set_attr "type" "sselog")
12557   (set_attr "prefix" "maybe_evex")
12558   (set_attr "mode" "OI")])
12559
12560(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12561  [(set (match_operand:V16SI 0 "register_operand" "=v")
12562	(vec_select:V16SI
12563	  (vec_concat:V32SI
12564	    (match_operand:V16SI 1 "register_operand" "v")
12565	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12566	  (parallel [(const_int 0) (const_int 16)
12567		     (const_int 1) (const_int 17)
12568		     (const_int 4) (const_int 20)
12569		     (const_int 5) (const_int 21)
12570		     (const_int 8) (const_int 24)
12571		     (const_int 9) (const_int 25)
12572		     (const_int 12) (const_int 28)
12573		     (const_int 13) (const_int 29)])))]
12574  "TARGET_AVX512F"
12575  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12576  [(set_attr "type" "sselog")
12577   (set_attr "prefix" "evex")
12578   (set_attr "mode" "XI")])
12579
12580(define_insn "vec_interleave_lowv4si<mask_name>"
12581  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12582	(vec_select:V4SI
12583	  (vec_concat:V8SI
12584	    (match_operand:V4SI 1 "register_operand" "0,v")
12585	    (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12586	  (parallel [(const_int 0) (const_int 4)
12587		     (const_int 1) (const_int 5)])))]
12588  "TARGET_SSE2 && <mask_avx512vl_condition>"
12589  "@
12590   punpckldq\t{%2, %0|%0, %2}
12591   vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12592  [(set_attr "isa" "noavx,avx")
12593   (set_attr "type" "sselog")
12594   (set_attr "prefix_data16" "1,*")
12595   (set_attr "prefix" "orig,vex")
12596   (set_attr "mode" "TI")])
12597
12598(define_expand "vec_interleave_high<mode>"
12599  [(match_operand:VI_256 0 "register_operand")
12600   (match_operand:VI_256 1 "register_operand")
12601   (match_operand:VI_256 2 "nonimmediate_operand")]
12602 "TARGET_AVX2"
12603{
12604  rtx t1 = gen_reg_rtx (<MODE>mode);
12605  rtx t2 = gen_reg_rtx (<MODE>mode);
12606  rtx t3 = gen_reg_rtx (V4DImode);
12607  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12608  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
12609  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12610				gen_lowpart (V4DImode, t2),
12611				GEN_INT (1 + (3 << 4))));
12612  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12613  DONE;
12614})
12615
12616(define_expand "vec_interleave_low<mode>"
12617  [(match_operand:VI_256 0 "register_operand")
12618   (match_operand:VI_256 1 "register_operand")
12619   (match_operand:VI_256 2 "nonimmediate_operand")]
12620 "TARGET_AVX2"
12621{
12622  rtx t1 = gen_reg_rtx (<MODE>mode);
12623  rtx t2 = gen_reg_rtx (<MODE>mode);
12624  rtx t3 = gen_reg_rtx (V4DImode);
12625  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12626  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12627  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12628				gen_lowpart (V4DImode, t2),
12629				GEN_INT (0 + (2 << 4))));
12630  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12631  DONE;
12632})
12633
12634;; Modes handled by pinsr patterns.
12635(define_mode_iterator PINSR_MODE
12636  [(V16QI "TARGET_SSE4_1") V8HI
12637   (V4SI "TARGET_SSE4_1")
12638   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12639
12640(define_mode_attr sse2p4_1
12641  [(V16QI "sse4_1") (V8HI "sse2")
12642   (V4SI "sse4_1") (V2DI "sse4_1")])
12643
12644(define_mode_attr pinsr_evex_isa
12645  [(V16QI "avx512bw") (V8HI "avx512bw")
12646   (V4SI "avx512dq") (V2DI "avx512dq")])
12647
12648;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12649(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12650  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
12651	(vec_merge:PINSR_MODE
12652	  (vec_duplicate:PINSR_MODE
12653	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
12654	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
12655	  (match_operand:SI 3 "const_int_operand")))]
12656  "TARGET_SSE2
12657   && ((unsigned) exact_log2 (INTVAL (operands[3]))
12658       < GET_MODE_NUNITS (<MODE>mode))"
12659{
12660  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
12661
12662  switch (which_alternative)
12663    {
12664    case 0:
12665      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12666	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
12667      /* FALLTHRU */
12668    case 1:
12669      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
12670    case 2:
12671    case 4:
12672      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12673	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
12674      /* FALLTHRU */
12675    case 3:
12676    case 5:
12677      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12678    default:
12679      gcc_unreachable ();
12680    }
12681}
12682  [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
12683   (set_attr "type" "sselog")
12684   (set (attr "prefix_rex")
12685     (if_then_else
12686       (and (not (match_test "TARGET_AVX"))
12687	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12688       (const_string "1")
12689       (const_string "*")))
12690   (set (attr "prefix_data16")
12691     (if_then_else
12692       (and (not (match_test "TARGET_AVX"))
12693	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12694       (const_string "1")
12695       (const_string "*")))
12696   (set (attr "prefix_extra")
12697     (if_then_else
12698       (and (not (match_test "TARGET_AVX"))
12699	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12700       (const_string "*")
12701       (const_string "1")))
12702   (set_attr "length_immediate" "1")
12703   (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
12704   (set_attr "mode" "TI")])
12705
12706(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12707  [(match_operand:AVX512_VEC 0 "register_operand")
12708   (match_operand:AVX512_VEC 1 "register_operand")
12709   (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12710   (match_operand:SI 3 "const_0_to_3_operand")
12711   (match_operand:AVX512_VEC 4 "register_operand")
12712   (match_operand:<avx512fmaskmode> 5 "register_operand")]
12713  "TARGET_AVX512F"
12714{
12715  int mask, selector;
12716  mask = INTVAL (operands[3]);
12717  selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
12718  	      ? 0xFFFF ^ (0x000F << mask * 4)
12719	      : 0xFF ^ (0x03 << mask * 2));
12720  emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12721    (operands[0], operands[1], operands[2], GEN_INT (selector),
12722     operands[4], operands[5]));
12723  DONE;
12724})
12725
12726(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12727  [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12728	(vec_merge:AVX512_VEC
12729	  (match_operand:AVX512_VEC 1 "register_operand" "v")
12730	  (vec_duplicate:AVX512_VEC
12731		(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12732	  (match_operand:SI 3 "const_int_operand" "n")))]
12733  "TARGET_AVX512F"
12734{
12735  int mask;
12736  int selector = INTVAL (operands[3]);
12737
12738  if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
12739    mask = 0;
12740  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
12741    mask = 1;
12742  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
12743    mask = 2;
12744  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
12745    mask = 3;
12746  else
12747    gcc_unreachable ();
12748
12749  operands[3] = GEN_INT (mask);
12750
12751  return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12752}
12753  [(set_attr "type" "sselog")
12754   (set_attr "length_immediate" "1")
12755   (set_attr "prefix" "evex")
12756   (set_attr "mode" "<sseinsnmode>")])
12757
12758(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12759  [(match_operand:AVX512_VEC_2 0 "register_operand")
12760   (match_operand:AVX512_VEC_2 1 "register_operand")
12761   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12762   (match_operand:SI 3 "const_0_to_1_operand")
12763   (match_operand:AVX512_VEC_2 4 "register_operand")
12764   (match_operand:<avx512fmaskmode> 5 "register_operand")]
12765  "TARGET_AVX512F"
12766{
12767  int mask = INTVAL (operands[3]);
12768  if (mask == 0)
12769    emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
12770					   operands[2], operands[4],
12771					   operands[5]));
12772  else
12773    emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12774					   operands[2], operands[4],
12775					   operands[5]));
12776  DONE;
12777})
12778
12779(define_insn "vec_set_lo_<mode><mask_name>"
12780  [(set (match_operand:V16FI 0 "register_operand" "=v")
12781	(vec_concat:V16FI
12782	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12783	  (vec_select:<ssehalfvecmode>
12784	    (match_operand:V16FI 1 "register_operand" "v")
12785	    (parallel [(const_int 8) (const_int 9)
12786		       (const_int 10) (const_int 11)
12787		       (const_int 12) (const_int 13)
12788		       (const_int 14) (const_int 15)]))))]
12789  "TARGET_AVX512DQ"
12790  "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12791  [(set_attr "type" "sselog")
12792   (set_attr "length_immediate" "1")
12793   (set_attr "prefix" "evex")
12794   (set_attr "mode" "<sseinsnmode>")])
12795
12796(define_insn "vec_set_hi_<mode><mask_name>"
12797  [(set (match_operand:V16FI 0 "register_operand" "=v")
12798	(vec_concat:V16FI
12799	  (vec_select:<ssehalfvecmode>
12800	    (match_operand:V16FI 1 "register_operand" "v")
12801	    (parallel [(const_int 0) (const_int 1)
12802		       (const_int 2) (const_int 3)
12803		       (const_int 4) (const_int 5)
12804		       (const_int 6) (const_int 7)]))
12805	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12806  "TARGET_AVX512DQ"
12807  "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12808  [(set_attr "type" "sselog")
12809   (set_attr "length_immediate" "1")
12810   (set_attr "prefix" "evex")
12811   (set_attr "mode" "<sseinsnmode>")])
12812
12813(define_insn "vec_set_lo_<mode><mask_name>"
12814  [(set (match_operand:V8FI 0 "register_operand" "=v")
12815	(vec_concat:V8FI
12816	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12817	  (vec_select:<ssehalfvecmode>
12818	    (match_operand:V8FI 1 "register_operand" "v")
12819	    (parallel [(const_int 4) (const_int 5)
12820		       (const_int 6) (const_int 7)]))))]
12821  "TARGET_AVX512F"
12822  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12823  [(set_attr "type" "sselog")
12824   (set_attr "length_immediate" "1")
12825   (set_attr "prefix" "evex")
12826   (set_attr "mode" "XI")])
12827
12828(define_insn "vec_set_hi_<mode><mask_name>"
12829  [(set (match_operand:V8FI 0 "register_operand" "=v")
12830	(vec_concat:V8FI
12831	  (vec_select:<ssehalfvecmode>
12832	    (match_operand:V8FI 1 "register_operand" "v")
12833	    (parallel [(const_int 0) (const_int 1)
12834		       (const_int 2) (const_int 3)]))
12835	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12836  "TARGET_AVX512F"
12837  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12838  [(set_attr "type" "sselog")
12839   (set_attr "length_immediate" "1")
12840   (set_attr "prefix" "evex")
12841   (set_attr "mode" "XI")])
12842
12843(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12844  [(match_operand:VI8F_256 0 "register_operand")
12845   (match_operand:VI8F_256 1 "register_operand")
12846   (match_operand:VI8F_256 2 "nonimmediate_operand")
12847   (match_operand:SI 3 "const_0_to_3_operand")
12848   (match_operand:VI8F_256 4 "register_operand")
12849   (match_operand:QI 5 "register_operand")]
12850  "TARGET_AVX512DQ"
12851{
12852  int mask = INTVAL (operands[3]);
12853  emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12854      (operands[0], operands[1], operands[2],
12855       GEN_INT (((mask >> 0) & 1) * 2 + 0),
12856       GEN_INT (((mask >> 0) & 1) * 2 + 1),
12857       GEN_INT (((mask >> 1) & 1) * 2 + 4),
12858       GEN_INT (((mask >> 1) & 1) * 2 + 5),
12859       operands[4], operands[5]));
12860  DONE;
12861})
12862
12863(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12864  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12865	(vec_select:VI8F_256
12866	  (vec_concat:<ssedoublemode>
12867	    (match_operand:VI8F_256 1 "register_operand" "v")
12868	    (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12869	  (parallel [(match_operand 3 "const_0_to_3_operand")
12870		     (match_operand 4 "const_0_to_3_operand")
12871		     (match_operand 5 "const_4_to_7_operand")
12872		     (match_operand 6 "const_4_to_7_operand")])))]
12873  "TARGET_AVX512VL
12874   && (INTVAL (operands[3]) & 1) == 0
12875   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
12876   && (INTVAL (operands[5]) & 1) == 0
12877   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
12878{
12879  int mask;
12880  mask = INTVAL (operands[3]) / 2;
12881  mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12882  operands[3] = GEN_INT (mask);
12883  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12884}
12885  [(set_attr "type" "sselog")
12886   (set_attr "length_immediate" "1")
12887   (set_attr "prefix" "evex")
12888   (set_attr "mode" "XI")])
12889
12890(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12891  [(match_operand:V8FI 0 "register_operand")
12892   (match_operand:V8FI 1 "register_operand")
12893   (match_operand:V8FI 2 "nonimmediate_operand")
12894   (match_operand:SI 3 "const_0_to_255_operand")
12895   (match_operand:V8FI 4 "register_operand")
12896   (match_operand:QI 5 "register_operand")]
12897  "TARGET_AVX512F"
12898{
12899  int mask = INTVAL (operands[3]);
12900  emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12901      (operands[0], operands[1], operands[2],
12902       GEN_INT (((mask >> 0) & 3) * 2),
12903       GEN_INT (((mask >> 0) & 3) * 2 + 1),
12904       GEN_INT (((mask >> 2) & 3) * 2),
12905       GEN_INT (((mask >> 2) & 3) * 2 + 1),
12906       GEN_INT (((mask >> 4) & 3) * 2 + 8),
12907       GEN_INT (((mask >> 4) & 3) * 2 + 9),
12908       GEN_INT (((mask >> 6) & 3) * 2 + 8),
12909       GEN_INT (((mask >> 6) & 3) * 2 + 9),
12910       operands[4], operands[5]));
12911  DONE;
12912})
12913
12914(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12915  [(set (match_operand:V8FI 0 "register_operand" "=v")
12916	(vec_select:V8FI
12917	  (vec_concat:<ssedoublemode>
12918	    (match_operand:V8FI 1 "register_operand" "v")
12919	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12920	  (parallel [(match_operand 3 "const_0_to_7_operand")
12921		     (match_operand 4 "const_0_to_7_operand")
12922		     (match_operand 5 "const_0_to_7_operand")
12923		     (match_operand 6 "const_0_to_7_operand")
12924		     (match_operand 7 "const_8_to_15_operand")
12925		     (match_operand 8 "const_8_to_15_operand")
12926		     (match_operand 9 "const_8_to_15_operand")
12927		     (match_operand 10 "const_8_to_15_operand")])))]
12928  "TARGET_AVX512F
12929   && (INTVAL (operands[3]) & 1) == 0
12930   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
12931   && (INTVAL (operands[5]) & 1) == 0
12932   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
12933   && (INTVAL (operands[7]) & 1) == 0
12934   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
12935   && (INTVAL (operands[9]) & 1) == 0
12936   && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
12937{
12938  int mask;
12939  mask = INTVAL (operands[3]) / 2;
12940  mask |= INTVAL (operands[5]) / 2 << 2;
12941  mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12942  mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12943  operands[3] = GEN_INT (mask);
12944
12945  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12946}
12947  [(set_attr "type" "sselog")
12948   (set_attr "length_immediate" "1")
12949   (set_attr "prefix" "evex")
12950   (set_attr "mode" "<sseinsnmode>")])
12951
12952(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12953  [(match_operand:VI4F_256 0 "register_operand")
12954   (match_operand:VI4F_256 1 "register_operand")
12955   (match_operand:VI4F_256 2 "nonimmediate_operand")
12956   (match_operand:SI 3 "const_0_to_3_operand")
12957   (match_operand:VI4F_256 4 "register_operand")
12958   (match_operand:QI 5 "register_operand")]
12959  "TARGET_AVX512VL"
12960{
12961  int mask = INTVAL (operands[3]);
12962  emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12963      (operands[0], operands[1], operands[2],
12964       GEN_INT (((mask >> 0) & 1) * 4 + 0),
12965       GEN_INT (((mask >> 0) & 1) * 4 + 1),
12966       GEN_INT (((mask >> 0) & 1) * 4 + 2),
12967       GEN_INT (((mask >> 0) & 1) * 4 + 3),
12968       GEN_INT (((mask >> 1) & 1) * 4 + 8),
12969       GEN_INT (((mask >> 1) & 1) * 4 + 9),
12970       GEN_INT (((mask >> 1) & 1) * 4 + 10),
12971       GEN_INT (((mask >> 1) & 1) * 4 + 11),
12972       operands[4], operands[5]));
12973  DONE;
12974})
12975
12976(define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12977  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12978	(vec_select:VI4F_256
12979	  (vec_concat:<ssedoublemode>
12980	    (match_operand:VI4F_256 1 "register_operand" "v")
12981	    (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12982	  (parallel [(match_operand 3 "const_0_to_7_operand")
12983		     (match_operand 4 "const_0_to_7_operand")
12984		     (match_operand 5 "const_0_to_7_operand")
12985		     (match_operand 6 "const_0_to_7_operand")
12986		     (match_operand 7 "const_8_to_15_operand")
12987		     (match_operand 8 "const_8_to_15_operand")
12988		     (match_operand 9 "const_8_to_15_operand")
12989		     (match_operand 10 "const_8_to_15_operand")])))]
12990  "TARGET_AVX512VL
12991   && (INTVAL (operands[3]) & 3) == 0
12992   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
12993   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
12994   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
12995   && (INTVAL (operands[7]) & 3) == 0
12996   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
12997   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
12998   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
12999{
13000  int mask;
13001  mask = INTVAL (operands[3]) / 4;
13002  mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
13003  operands[3] = GEN_INT (mask);
13004
13005  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13006}
13007  [(set_attr "type" "sselog")
13008   (set_attr "length_immediate" "1")
13009   (set_attr "prefix" "evex")
13010   (set_attr "mode" "<sseinsnmode>")])
13011
13012(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
13013  [(match_operand:V16FI 0 "register_operand")
13014   (match_operand:V16FI 1 "register_operand")
13015   (match_operand:V16FI 2 "nonimmediate_operand")
13016   (match_operand:SI 3 "const_0_to_255_operand")
13017   (match_operand:V16FI 4 "register_operand")
13018   (match_operand:HI 5 "register_operand")]
13019  "TARGET_AVX512F"
13020{
13021  int mask = INTVAL (operands[3]);
13022  emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
13023      (operands[0], operands[1], operands[2],
13024       GEN_INT (((mask >> 0) & 3) * 4),
13025       GEN_INT (((mask >> 0) & 3) * 4 + 1),
13026       GEN_INT (((mask >> 0) & 3) * 4 + 2),
13027       GEN_INT (((mask >> 0) & 3) * 4 + 3),
13028       GEN_INT (((mask >> 2) & 3) * 4),
13029       GEN_INT (((mask >> 2) & 3) * 4 + 1),
13030       GEN_INT (((mask >> 2) & 3) * 4 + 2),
13031       GEN_INT (((mask >> 2) & 3) * 4 + 3),
13032       GEN_INT (((mask >> 4) & 3) * 4 + 16),
13033       GEN_INT (((mask >> 4) & 3) * 4 + 17),
13034       GEN_INT (((mask >> 4) & 3) * 4 + 18),
13035       GEN_INT (((mask >> 4) & 3) * 4 + 19),
13036       GEN_INT (((mask >> 6) & 3) * 4 + 16),
13037       GEN_INT (((mask >> 6) & 3) * 4 + 17),
13038       GEN_INT (((mask >> 6) & 3) * 4 + 18),
13039       GEN_INT (((mask >> 6) & 3) * 4 + 19),
13040       operands[4], operands[5]));
13041  DONE;
13042})
13043
13044(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
13045  [(set (match_operand:V16FI 0 "register_operand" "=v")
13046	(vec_select:V16FI
13047	  (vec_concat:<ssedoublemode>
13048	    (match_operand:V16FI 1 "register_operand" "v")
13049	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
13050	  (parallel [(match_operand 3 "const_0_to_15_operand")
13051		     (match_operand 4 "const_0_to_15_operand")
13052		     (match_operand 5 "const_0_to_15_operand")
13053		     (match_operand 6 "const_0_to_15_operand")
13054		     (match_operand 7 "const_0_to_15_operand")
13055		     (match_operand 8 "const_0_to_15_operand")
13056		     (match_operand 9 "const_0_to_15_operand")
13057		     (match_operand 10 "const_0_to_15_operand")
13058		     (match_operand 11 "const_16_to_31_operand")
13059		     (match_operand 12 "const_16_to_31_operand")
13060		     (match_operand 13 "const_16_to_31_operand")
13061		     (match_operand 14 "const_16_to_31_operand")
13062		     (match_operand 15 "const_16_to_31_operand")
13063		     (match_operand 16 "const_16_to_31_operand")
13064		     (match_operand 17 "const_16_to_31_operand")
13065		     (match_operand 18 "const_16_to_31_operand")])))]
13066  "TARGET_AVX512F
13067   && (INTVAL (operands[3]) & 3) == 0
13068   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13069   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
13070   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
13071   && (INTVAL (operands[7]) & 3) == 0
13072   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13073   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
13074   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
13075   && (INTVAL (operands[11]) & 3) == 0
13076   && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
13077   && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
13078   && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
13079   && (INTVAL (operands[15]) & 3) == 0
13080   && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
13081   && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
13082   && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
13083{
13084  int mask;
13085  mask = INTVAL (operands[3]) / 4;
13086  mask |= INTVAL (operands[7]) / 4 << 2;
13087  mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
13088  mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
13089  operands[3] = GEN_INT (mask);
13090
13091  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
13092}
13093  [(set_attr "type" "sselog")
13094   (set_attr "length_immediate" "1")
13095   (set_attr "prefix" "evex")
13096   (set_attr "mode" "<sseinsnmode>")])
13097
13098(define_expand "avx512f_pshufdv3_mask"
13099  [(match_operand:V16SI 0 "register_operand")
13100   (match_operand:V16SI 1 "nonimmediate_operand")
13101   (match_operand:SI 2 "const_0_to_255_operand")
13102   (match_operand:V16SI 3 "register_operand")
13103   (match_operand:HI 4 "register_operand")]
13104  "TARGET_AVX512F"
13105{
13106  int mask = INTVAL (operands[2]);
13107  emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
13108				       GEN_INT ((mask >> 0) & 3),
13109				       GEN_INT ((mask >> 2) & 3),
13110				       GEN_INT ((mask >> 4) & 3),
13111				       GEN_INT ((mask >> 6) & 3),
13112				       GEN_INT (((mask >> 0) & 3) + 4),
13113				       GEN_INT (((mask >> 2) & 3) + 4),
13114				       GEN_INT (((mask >> 4) & 3) + 4),
13115				       GEN_INT (((mask >> 6) & 3) + 4),
13116				       GEN_INT (((mask >> 0) & 3) + 8),
13117				       GEN_INT (((mask >> 2) & 3) + 8),
13118				       GEN_INT (((mask >> 4) & 3) + 8),
13119				       GEN_INT (((mask >> 6) & 3) + 8),
13120				       GEN_INT (((mask >> 0) & 3) + 12),
13121				       GEN_INT (((mask >> 2) & 3) + 12),
13122				       GEN_INT (((mask >> 4) & 3) + 12),
13123				       GEN_INT (((mask >> 6) & 3) + 12),
13124				       operands[3], operands[4]));
13125  DONE;
13126})
13127
13128(define_insn "avx512f_pshufd_1<mask_name>"
13129  [(set (match_operand:V16SI 0 "register_operand" "=v")
13130	(vec_select:V16SI
13131	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")
13132	  (parallel [(match_operand 2 "const_0_to_3_operand")
13133		     (match_operand 3 "const_0_to_3_operand")
13134		     (match_operand 4 "const_0_to_3_operand")
13135		     (match_operand 5 "const_0_to_3_operand")
13136		     (match_operand 6 "const_4_to_7_operand")
13137		     (match_operand 7 "const_4_to_7_operand")
13138		     (match_operand 8 "const_4_to_7_operand")
13139		     (match_operand 9 "const_4_to_7_operand")
13140		     (match_operand 10 "const_8_to_11_operand")
13141		     (match_operand 11 "const_8_to_11_operand")
13142		     (match_operand 12 "const_8_to_11_operand")
13143		     (match_operand 13 "const_8_to_11_operand")
13144		     (match_operand 14 "const_12_to_15_operand")
13145		     (match_operand 15 "const_12_to_15_operand")
13146		     (match_operand 16 "const_12_to_15_operand")
13147		     (match_operand 17 "const_12_to_15_operand")])))]
13148  "TARGET_AVX512F
13149   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13150   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13151   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13152   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
13153   && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
13154   && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
13155   && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
13156   && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
13157   && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
13158   && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
13159   && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
13160   && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
13161{
13162  int mask = 0;
13163  mask |= INTVAL (operands[2]) << 0;
13164  mask |= INTVAL (operands[3]) << 2;
13165  mask |= INTVAL (operands[4]) << 4;
13166  mask |= INTVAL (operands[5]) << 6;
13167  operands[2] = GEN_INT (mask);
13168
13169  return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
13170}
13171  [(set_attr "type" "sselog1")
13172   (set_attr "prefix" "evex")
13173   (set_attr "length_immediate" "1")
13174   (set_attr "mode" "XI")])
13175
13176(define_expand "avx512vl_pshufdv3_mask"
13177  [(match_operand:V8SI 0 "register_operand")
13178   (match_operand:V8SI 1 "nonimmediate_operand")
13179   (match_operand:SI 2 "const_0_to_255_operand")
13180   (match_operand:V8SI 3 "register_operand")
13181   (match_operand:QI 4 "register_operand")]
13182  "TARGET_AVX512VL"
13183{
13184  int mask = INTVAL (operands[2]);
13185  emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
13186				GEN_INT ((mask >> 0) & 3),
13187				GEN_INT ((mask >> 2) & 3),
13188				GEN_INT ((mask >> 4) & 3),
13189				GEN_INT ((mask >> 6) & 3),
13190				GEN_INT (((mask >> 0) & 3) + 4),
13191				GEN_INT (((mask >> 2) & 3) + 4),
13192				GEN_INT (((mask >> 4) & 3) + 4),
13193				GEN_INT (((mask >> 6) & 3) + 4),
13194                operands[3], operands[4]));
13195  DONE;
13196})
13197
13198(define_expand "avx2_pshufdv3"
13199  [(match_operand:V8SI 0 "register_operand")
13200   (match_operand:V8SI 1 "nonimmediate_operand")
13201   (match_operand:SI 2 "const_0_to_255_operand")]
13202  "TARGET_AVX2"
13203{
13204  int mask = INTVAL (operands[2]);
13205  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
13206				GEN_INT ((mask >> 0) & 3),
13207				GEN_INT ((mask >> 2) & 3),
13208				GEN_INT ((mask >> 4) & 3),
13209				GEN_INT ((mask >> 6) & 3),
13210				GEN_INT (((mask >> 0) & 3) + 4),
13211				GEN_INT (((mask >> 2) & 3) + 4),
13212				GEN_INT (((mask >> 4) & 3) + 4),
13213				GEN_INT (((mask >> 6) & 3) + 4)));
13214  DONE;
13215})
13216
13217(define_insn "avx2_pshufd_1<mask_name>"
13218  [(set (match_operand:V8SI 0 "register_operand" "=v")
13219	(vec_select:V8SI
13220	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")
13221	  (parallel [(match_operand 2 "const_0_to_3_operand")
13222		     (match_operand 3 "const_0_to_3_operand")
13223		     (match_operand 4 "const_0_to_3_operand")
13224		     (match_operand 5 "const_0_to_3_operand")
13225		     (match_operand 6 "const_4_to_7_operand")
13226		     (match_operand 7 "const_4_to_7_operand")
13227		     (match_operand 8 "const_4_to_7_operand")
13228		     (match_operand 9 "const_4_to_7_operand")])))]
13229  "TARGET_AVX2
13230   && <mask_avx512vl_condition>
13231   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13232   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13233   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13234   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
13235{
13236  int mask = 0;
13237  mask |= INTVAL (operands[2]) << 0;
13238  mask |= INTVAL (operands[3]) << 2;
13239  mask |= INTVAL (operands[4]) << 4;
13240  mask |= INTVAL (operands[5]) << 6;
13241  operands[2] = GEN_INT (mask);
13242
13243  return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13244}
13245  [(set_attr "type" "sselog1")
13246   (set_attr "prefix" "maybe_evex")
13247   (set_attr "length_immediate" "1")
13248   (set_attr "mode" "OI")])
13249
13250(define_expand "avx512vl_pshufd_mask"
13251  [(match_operand:V4SI 0 "register_operand")
13252   (match_operand:V4SI 1 "nonimmediate_operand")
13253   (match_operand:SI 2 "const_0_to_255_operand")
13254   (match_operand:V4SI 3 "register_operand")
13255   (match_operand:QI 4 "register_operand")]
13256  "TARGET_AVX512VL"
13257{
13258  int mask = INTVAL (operands[2]);
13259  emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
13260				GEN_INT ((mask >> 0) & 3),
13261				GEN_INT ((mask >> 2) & 3),
13262				GEN_INT ((mask >> 4) & 3),
13263				GEN_INT ((mask >> 6) & 3),
13264                operands[3], operands[4]));
13265  DONE;
13266})
13267
13268(define_expand "sse2_pshufd"
13269  [(match_operand:V4SI 0 "register_operand")
13270   (match_operand:V4SI 1 "vector_operand")
13271   (match_operand:SI 2 "const_int_operand")]
13272  "TARGET_SSE2"
13273{
13274  int mask = INTVAL (operands[2]);
13275  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
13276				GEN_INT ((mask >> 0) & 3),
13277				GEN_INT ((mask >> 2) & 3),
13278				GEN_INT ((mask >> 4) & 3),
13279				GEN_INT ((mask >> 6) & 3)));
13280  DONE;
13281})
13282
13283(define_insn "sse2_pshufd_1<mask_name>"
13284  [(set (match_operand:V4SI 0 "register_operand" "=v")
13285	(vec_select:V4SI
13286	  (match_operand:V4SI 1 "vector_operand" "vBm")
13287	  (parallel [(match_operand 2 "const_0_to_3_operand")
13288		     (match_operand 3 "const_0_to_3_operand")
13289		     (match_operand 4 "const_0_to_3_operand")
13290		     (match_operand 5 "const_0_to_3_operand")])))]
13291  "TARGET_SSE2 && <mask_avx512vl_condition>"
13292{
13293  int mask = 0;
13294  mask |= INTVAL (operands[2]) << 0;
13295  mask |= INTVAL (operands[3]) << 2;
13296  mask |= INTVAL (operands[4]) << 4;
13297  mask |= INTVAL (operands[5]) << 6;
13298  operands[2] = GEN_INT (mask);
13299
13300  return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13301}
13302  [(set_attr "type" "sselog1")
13303   (set_attr "prefix_data16" "1")
13304   (set_attr "prefix" "<mask_prefix2>")
13305   (set_attr "length_immediate" "1")
13306   (set_attr "mode" "TI")])
13307
13308(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
13309  [(set (match_operand:V32HI 0 "register_operand" "=v")
13310	(unspec:V32HI
13311	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13312	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
13313	  UNSPEC_PSHUFLW))]
13314  "TARGET_AVX512BW"
13315  "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13316  [(set_attr "type" "sselog")
13317   (set_attr "prefix" "evex")
13318   (set_attr "mode" "XI")])
13319
13320(define_expand "avx512vl_pshuflwv3_mask"
13321  [(match_operand:V16HI 0 "register_operand")
13322   (match_operand:V16HI 1 "nonimmediate_operand")
13323   (match_operand:SI 2 "const_0_to_255_operand")
13324   (match_operand:V16HI 3 "register_operand")
13325   (match_operand:HI 4 "register_operand")]
13326  "TARGET_AVX512VL && TARGET_AVX512BW"
13327{
13328  int mask = INTVAL (operands[2]);
13329  emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
13330				 GEN_INT ((mask >> 0) & 3),
13331				 GEN_INT ((mask >> 2) & 3),
13332				 GEN_INT ((mask >> 4) & 3),
13333				 GEN_INT ((mask >> 6) & 3),
13334				 GEN_INT (((mask >> 0) & 3) + 8),
13335				 GEN_INT (((mask >> 2) & 3) + 8),
13336				 GEN_INT (((mask >> 4) & 3) + 8),
13337				 GEN_INT (((mask >> 6) & 3) + 8),
13338                 operands[3], operands[4]));
13339  DONE;
13340})
13341
13342(define_expand "avx2_pshuflwv3"
13343  [(match_operand:V16HI 0 "register_operand")
13344   (match_operand:V16HI 1 "nonimmediate_operand")
13345   (match_operand:SI 2 "const_0_to_255_operand")]
13346  "TARGET_AVX2"
13347{
13348  int mask = INTVAL (operands[2]);
13349  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
13350				 GEN_INT ((mask >> 0) & 3),
13351				 GEN_INT ((mask >> 2) & 3),
13352				 GEN_INT ((mask >> 4) & 3),
13353				 GEN_INT ((mask >> 6) & 3),
13354				 GEN_INT (((mask >> 0) & 3) + 8),
13355				 GEN_INT (((mask >> 2) & 3) + 8),
13356				 GEN_INT (((mask >> 4) & 3) + 8),
13357				 GEN_INT (((mask >> 6) & 3) + 8)));
13358  DONE;
13359})
13360
13361(define_insn "avx2_pshuflw_1<mask_name>"
13362  [(set (match_operand:V16HI 0 "register_operand" "=v")
13363	(vec_select:V16HI
13364	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13365	  (parallel [(match_operand 2 "const_0_to_3_operand")
13366		     (match_operand 3 "const_0_to_3_operand")
13367		     (match_operand 4 "const_0_to_3_operand")
13368		     (match_operand 5 "const_0_to_3_operand")
13369		     (const_int 4)
13370		     (const_int 5)
13371		     (const_int 6)
13372		     (const_int 7)
13373		     (match_operand 6 "const_8_to_11_operand")
13374		     (match_operand 7 "const_8_to_11_operand")
13375		     (match_operand 8 "const_8_to_11_operand")
13376		     (match_operand 9 "const_8_to_11_operand")
13377		     (const_int 12)
13378		     (const_int 13)
13379		     (const_int 14)
13380		     (const_int 15)])))]
13381  "TARGET_AVX2
13382   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13383   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13384   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13385   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13386   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13387{
13388  int mask = 0;
13389  mask |= INTVAL (operands[2]) << 0;
13390  mask |= INTVAL (operands[3]) << 2;
13391  mask |= INTVAL (operands[4]) << 4;
13392  mask |= INTVAL (operands[5]) << 6;
13393  operands[2] = GEN_INT (mask);
13394
13395  return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13396}
13397  [(set_attr "type" "sselog")
13398   (set_attr "prefix" "maybe_evex")
13399   (set_attr "length_immediate" "1")
13400   (set_attr "mode" "OI")])
13401
13402(define_expand "avx512vl_pshuflw_mask"
13403  [(match_operand:V8HI 0 "register_operand")
13404   (match_operand:V8HI 1 "nonimmediate_operand")
13405   (match_operand:SI 2 "const_0_to_255_operand")
13406   (match_operand:V8HI 3 "register_operand")
13407   (match_operand:QI 4 "register_operand")]
13408  "TARGET_AVX512VL && TARGET_AVX512BW"
13409{
13410  int mask = INTVAL (operands[2]);
13411  emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
13412				 GEN_INT ((mask >> 0) & 3),
13413				 GEN_INT ((mask >> 2) & 3),
13414				 GEN_INT ((mask >> 4) & 3),
13415				 GEN_INT ((mask >> 6) & 3),
13416                 operands[3], operands[4]));
13417  DONE;
13418})
13419
13420(define_expand "sse2_pshuflw"
13421  [(match_operand:V8HI 0 "register_operand")
13422   (match_operand:V8HI 1 "vector_operand")
13423   (match_operand:SI 2 "const_int_operand")]
13424  "TARGET_SSE2"
13425{
13426  int mask = INTVAL (operands[2]);
13427  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
13428				 GEN_INT ((mask >> 0) & 3),
13429				 GEN_INT ((mask >> 2) & 3),
13430				 GEN_INT ((mask >> 4) & 3),
13431				 GEN_INT ((mask >> 6) & 3)));
13432  DONE;
13433})
13434
13435(define_insn "sse2_pshuflw_1<mask_name>"
13436  [(set (match_operand:V8HI 0 "register_operand" "=v")
13437	(vec_select:V8HI
13438	  (match_operand:V8HI 1 "vector_operand" "vBm")
13439	  (parallel [(match_operand 2 "const_0_to_3_operand")
13440		     (match_operand 3 "const_0_to_3_operand")
13441		     (match_operand 4 "const_0_to_3_operand")
13442		     (match_operand 5 "const_0_to_3_operand")
13443		     (const_int 4)
13444		     (const_int 5)
13445		     (const_int 6)
13446		     (const_int 7)])))]
13447  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13448{
13449  int mask = 0;
13450  mask |= INTVAL (operands[2]) << 0;
13451  mask |= INTVAL (operands[3]) << 2;
13452  mask |= INTVAL (operands[4]) << 4;
13453  mask |= INTVAL (operands[5]) << 6;
13454  operands[2] = GEN_INT (mask);
13455
13456  return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13457}
13458  [(set_attr "type" "sselog")
13459   (set_attr "prefix_data16" "0")
13460   (set_attr "prefix_rep" "1")
13461   (set_attr "prefix" "maybe_vex")
13462   (set_attr "length_immediate" "1")
13463   (set_attr "mode" "TI")])
13464
13465(define_expand "avx2_pshufhwv3"
13466  [(match_operand:V16HI 0 "register_operand")
13467   (match_operand:V16HI 1 "nonimmediate_operand")
13468   (match_operand:SI 2 "const_0_to_255_operand")]
13469  "TARGET_AVX2"
13470{
13471  int mask = INTVAL (operands[2]);
13472  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
13473				 GEN_INT (((mask >> 0) & 3) + 4),
13474				 GEN_INT (((mask >> 2) & 3) + 4),
13475				 GEN_INT (((mask >> 4) & 3) + 4),
13476				 GEN_INT (((mask >> 6) & 3) + 4),
13477				 GEN_INT (((mask >> 0) & 3) + 12),
13478				 GEN_INT (((mask >> 2) & 3) + 12),
13479				 GEN_INT (((mask >> 4) & 3) + 12),
13480				 GEN_INT (((mask >> 6) & 3) + 12)));
13481  DONE;
13482})
13483
13484(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
13485  [(set (match_operand:V32HI 0 "register_operand" "=v")
13486	(unspec:V32HI
13487	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13488	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
13489	  UNSPEC_PSHUFHW))]
13490  "TARGET_AVX512BW"
13491  "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13492  [(set_attr "type" "sselog")
13493   (set_attr "prefix" "evex")
13494   (set_attr "mode" "XI")])
13495
13496(define_expand "avx512vl_pshufhwv3_mask"
13497  [(match_operand:V16HI 0 "register_operand")
13498   (match_operand:V16HI 1 "nonimmediate_operand")
13499   (match_operand:SI 2 "const_0_to_255_operand")
13500   (match_operand:V16HI 3 "register_operand")
13501   (match_operand:HI 4 "register_operand")]
13502  "TARGET_AVX512VL && TARGET_AVX512BW"
13503{
13504  int mask = INTVAL (operands[2]);
13505  emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13506				 GEN_INT (((mask >> 0) & 3) + 4),
13507				 GEN_INT (((mask >> 2) & 3) + 4),
13508				 GEN_INT (((mask >> 4) & 3) + 4),
13509				 GEN_INT (((mask >> 6) & 3) + 4),
13510				 GEN_INT (((mask >> 0) & 3) + 12),
13511				 GEN_INT (((mask >> 2) & 3) + 12),
13512				 GEN_INT (((mask >> 4) & 3) + 12),
13513				 GEN_INT (((mask >> 6) & 3) + 12),
13514                 operands[3], operands[4]));
13515  DONE;
13516})
13517
13518(define_insn "avx2_pshufhw_1<mask_name>"
13519  [(set (match_operand:V16HI 0 "register_operand" "=v")
13520	(vec_select:V16HI
13521	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13522	  (parallel [(const_int 0)
13523		     (const_int 1)
13524		     (const_int 2)
13525		     (const_int 3)
13526		     (match_operand 2 "const_4_to_7_operand")
13527		     (match_operand 3 "const_4_to_7_operand")
13528		     (match_operand 4 "const_4_to_7_operand")
13529		     (match_operand 5 "const_4_to_7_operand")
13530		     (const_int 8)
13531		     (const_int 9)
13532		     (const_int 10)
13533		     (const_int 11)
13534		     (match_operand 6 "const_12_to_15_operand")
13535		     (match_operand 7 "const_12_to_15_operand")
13536		     (match_operand 8 "const_12_to_15_operand")
13537		     (match_operand 9 "const_12_to_15_operand")])))]
13538  "TARGET_AVX2
13539   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13540   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13541   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13542   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13543   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13544{
13545  int mask = 0;
13546  mask |= (INTVAL (operands[2]) - 4) << 0;
13547  mask |= (INTVAL (operands[3]) - 4) << 2;
13548  mask |= (INTVAL (operands[4]) - 4) << 4;
13549  mask |= (INTVAL (operands[5]) - 4) << 6;
13550  operands[2] = GEN_INT (mask);
13551
13552  return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13553}
13554  [(set_attr "type" "sselog")
13555   (set_attr "prefix" "maybe_evex")
13556   (set_attr "length_immediate" "1")
13557   (set_attr "mode" "OI")])
13558
13559(define_expand "avx512vl_pshufhw_mask"
13560  [(match_operand:V8HI 0 "register_operand")
13561   (match_operand:V8HI 1 "nonimmediate_operand")
13562   (match_operand:SI 2 "const_0_to_255_operand")
13563   (match_operand:V8HI 3 "register_operand")
13564   (match_operand:QI 4 "register_operand")]
13565  "TARGET_AVX512VL && TARGET_AVX512BW"
13566{
13567  int mask = INTVAL (operands[2]);
13568  emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13569				 GEN_INT (((mask >> 0) & 3) + 4),
13570				 GEN_INT (((mask >> 2) & 3) + 4),
13571				 GEN_INT (((mask >> 4) & 3) + 4),
13572				 GEN_INT (((mask >> 6) & 3) + 4),
13573                 operands[3], operands[4]));
13574  DONE;
13575})
13576
13577(define_expand "sse2_pshufhw"
13578  [(match_operand:V8HI 0 "register_operand")
13579   (match_operand:V8HI 1 "vector_operand")
13580   (match_operand:SI 2 "const_int_operand")]
13581  "TARGET_SSE2"
13582{
13583  int mask = INTVAL (operands[2]);
13584  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13585				 GEN_INT (((mask >> 0) & 3) + 4),
13586				 GEN_INT (((mask >> 2) & 3) + 4),
13587				 GEN_INT (((mask >> 4) & 3) + 4),
13588				 GEN_INT (((mask >> 6) & 3) + 4)));
13589  DONE;
13590})
13591
13592(define_insn "sse2_pshufhw_1<mask_name>"
13593  [(set (match_operand:V8HI 0 "register_operand" "=v")
13594	(vec_select:V8HI
13595	  (match_operand:V8HI 1 "vector_operand" "vBm")
13596	  (parallel [(const_int 0)
13597		     (const_int 1)
13598		     (const_int 2)
13599		     (const_int 3)
13600		     (match_operand 2 "const_4_to_7_operand")
13601		     (match_operand 3 "const_4_to_7_operand")
13602		     (match_operand 4 "const_4_to_7_operand")
13603		     (match_operand 5 "const_4_to_7_operand")])))]
13604  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13605{
13606  int mask = 0;
13607  mask |= (INTVAL (operands[2]) - 4) << 0;
13608  mask |= (INTVAL (operands[3]) - 4) << 2;
13609  mask |= (INTVAL (operands[4]) - 4) << 4;
13610  mask |= (INTVAL (operands[5]) - 4) << 6;
13611  operands[2] = GEN_INT (mask);
13612
13613  return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13614}
13615  [(set_attr "type" "sselog")
13616   (set_attr "prefix_rep" "1")
13617   (set_attr "prefix_data16" "0")
13618   (set_attr "prefix" "maybe_vex")
13619   (set_attr "length_immediate" "1")
13620   (set_attr "mode" "TI")])
13621
13622(define_expand "sse2_loadd"
13623  [(set (match_operand:V4SI 0 "register_operand")
13624	(vec_merge:V4SI
13625	  (vec_duplicate:V4SI
13626	    (match_operand:SI 1 "nonimmediate_operand"))
13627	  (match_dup 2)
13628	  (const_int 1)))]
13629  "TARGET_SSE"
13630  "operands[2] = CONST0_RTX (V4SImode);")
13631
13632(define_insn "sse2_loadld"
13633  [(set (match_operand:V4SI 0 "register_operand"       "=v,Yi,x,x,v")
13634	(vec_merge:V4SI
13635	  (vec_duplicate:V4SI
13636	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
13637	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,v")
13638	  (const_int 1)))]
13639  "TARGET_SSE"
13640  "@
13641   %vmovd\t{%2, %0|%0, %2}
13642   %vmovd\t{%2, %0|%0, %2}
13643   movss\t{%2, %0|%0, %2}
13644   movss\t{%2, %0|%0, %2}
13645   vmovss\t{%2, %1, %0|%0, %1, %2}"
13646  [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
13647   (set_attr "type" "ssemov")
13648   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
13649   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
13650
13651;; QI and HI modes handled by pextr patterns.
13652(define_mode_iterator PEXTR_MODE12
13653  [(V16QI "TARGET_SSE4_1") V8HI])
13654
13655(define_insn "*vec_extract<mode>"
13656  [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
13657	(vec_select:<ssescalarmode>
13658	  (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
13659	  (parallel
13660	    [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
13661  "TARGET_SSE2"
13662  "@
13663   %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13664   %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
13665   vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13666   vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13667  [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
13668   (set_attr "type" "sselog1")
13669   (set_attr "prefix_data16" "1")
13670   (set (attr "prefix_extra")
13671     (if_then_else
13672       (and (eq_attr "alternative" "0,2")
13673	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13674       (const_string "*")
13675       (const_string "1")))
13676   (set_attr "length_immediate" "1")
13677   (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
13678   (set_attr "mode" "TI")])
13679
13680(define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
13681  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
13682	(zero_extend:SWI48
13683	  (vec_select:<PEXTR_MODE12:ssescalarmode>
13684	    (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
13685	    (parallel
13686	      [(match_operand:SI 2
13687		"const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
13688  "TARGET_SSE2"
13689  "@
13690   %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13691   vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13692  [(set_attr "isa" "*,avx512bw")
13693   (set_attr "type" "sselog1")
13694   (set_attr "prefix_data16" "1")
13695   (set (attr "prefix_extra")
13696     (if_then_else
13697       (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13698       (const_string "*")
13699       (const_string "1")))
13700   (set_attr "length_immediate" "1")
13701   (set_attr "prefix" "maybe_vex")
13702   (set_attr "mode" "TI")])
13703
13704(define_insn "*vec_extract<mode>_mem"
13705  [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13706	(vec_select:<ssescalarmode>
13707	  (match_operand:VI12_128 1 "memory_operand" "o")
13708	  (parallel
13709	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13710  "TARGET_SSE"
13711  "#")
13712
13713(define_insn "*vec_extract<ssevecmodelower>_0"
13714  [(set (match_operand:SWI48 0 "nonimmediate_operand"	       "=r ,v ,m")
13715	(vec_select:SWI48
13716	  (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,vm,v")
13717	  (parallel [(const_int 0)])))]
13718  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13719  "#")
13720
13721(define_insn "*vec_extractv2di_0_sse"
13722  [(set (match_operand:DI 0 "nonimmediate_operand"     "=v,m")
13723	(vec_select:DI
13724	  (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
13725	  (parallel [(const_int 0)])))]
13726  "TARGET_SSE && !TARGET_64BIT
13727   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13728  "#")
13729
13730(define_split
13731  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13732	(vec_select:SWI48x
13733	  (match_operand:<ssevecmode> 1 "register_operand")
13734	  (parallel [(const_int 0)])))]
13735  "TARGET_SSE && reload_completed"
13736  [(set (match_dup 0) (match_dup 1))]
13737  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
13738
13739(define_insn "*vec_extractv4si_0_zext_sse4"
13740  [(set (match_operand:DI 0 "register_operand" "=r,x,v")
13741	(zero_extend:DI
13742	  (vec_select:SI
13743	    (match_operand:V4SI 1 "register_operand" "Yj,x,v")
13744	    (parallel [(const_int 0)]))))]
13745  "TARGET_SSE4_1"
13746  "#"
13747  [(set_attr "isa" "x64,*,avx512f")])
13748
13749(define_insn "*vec_extractv4si_0_zext"
13750  [(set (match_operand:DI 0 "register_operand" "=r")
13751	(zero_extend:DI
13752	  (vec_select:SI
13753	    (match_operand:V4SI 1 "register_operand" "x")
13754	    (parallel [(const_int 0)]))))]
13755  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13756  "#")
13757
13758(define_split
13759  [(set (match_operand:DI 0 "register_operand")
13760	(zero_extend:DI
13761	  (vec_select:SI
13762	    (match_operand:V4SI 1 "register_operand")
13763	    (parallel [(const_int 0)]))))]
13764  "TARGET_SSE2 && reload_completed"
13765  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13766  "operands[1] = gen_lowpart (SImode, operands[1]);")
13767
13768(define_insn "*vec_extractv4si"
13769  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
13770	(vec_select:SI
13771	  (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
13772	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13773  "TARGET_SSE4_1"
13774{
13775  switch (which_alternative)
13776    {
13777    case 0:
13778    case 1:
13779      return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13780
13781    case 2:
13782    case 3:
13783      operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13784      return "psrldq\t{%2, %0|%0, %2}";
13785
13786    case 4:
13787    case 5:
13788      operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13789      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13790
13791    default:
13792      gcc_unreachable ();
13793    }
13794}
13795  [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
13796   (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
13797   (set (attr "prefix_extra")
13798     (if_then_else (eq_attr "alternative" "0,1")
13799		   (const_string "1")
13800		   (const_string "*")))
13801   (set_attr "length_immediate" "1")
13802   (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
13803   (set_attr "mode" "TI")])
13804
13805(define_insn "*vec_extractv4si_zext"
13806  [(set (match_operand:DI 0 "register_operand" "=r,r")
13807	(zero_extend:DI
13808	  (vec_select:SI
13809	    (match_operand:V4SI 1 "register_operand" "x,v")
13810	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13811  "TARGET_64BIT && TARGET_SSE4_1"
13812  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13813  [(set_attr "isa" "*,avx512dq")
13814   (set_attr "type" "sselog1")
13815   (set_attr "prefix_extra" "1")
13816   (set_attr "length_immediate" "1")
13817   (set_attr "prefix" "maybe_vex")
13818   (set_attr "mode" "TI")])
13819
13820(define_insn "*vec_extractv4si_mem"
13821  [(set (match_operand:SI 0 "register_operand" "=x,r")
13822	(vec_select:SI
13823	  (match_operand:V4SI 1 "memory_operand" "o,o")
13824	  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13825  "TARGET_SSE"
13826  "#")
13827
13828(define_insn_and_split "*vec_extractv4si_zext_mem"
13829  [(set (match_operand:DI 0 "register_operand" "=x,r")
13830	(zero_extend:DI
13831	  (vec_select:SI
13832	    (match_operand:V4SI 1 "memory_operand" "o,o")
13833	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13834  "TARGET_64BIT && TARGET_SSE"
13835  "#"
13836  "&& reload_completed"
13837  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13838{
13839  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13840})
13841
13842(define_insn "*vec_extractv2di_1"
13843  [(set (match_operand:DI 0 "nonimmediate_operand"     "=rm,rm,m,x,x,Yv,x,v,r")
13844	(vec_select:DI
13845	  (match_operand:V2DI 1 "nonimmediate_operand"  "x ,v ,v,0,x, v,x,o,o")
13846	  (parallel [(const_int 1)])))]
13847  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13848  "@
13849   %vpextrq\t{$1, %1, %0|%0, %1, 1}
13850   vpextrq\t{$1, %1, %0|%0, %1, 1}
13851   %vmovhps\t{%1, %0|%0, %1}
13852   psrldq\t{$8, %0|%0, 8}
13853   vpsrldq\t{$8, %1, %0|%0, %1, 8}
13854   vpsrldq\t{$8, %1, %0|%0, %1, 8}
13855   movhlps\t{%1, %0|%0, %1}
13856   #
13857   #"
13858  [(set (attr "isa")
13859     (cond [(eq_attr "alternative" "0")
13860	      (const_string "x64_sse4")
13861	    (eq_attr "alternative" "1")
13862	      (const_string "x64_avx512dq")
13863	    (eq_attr "alternative" "3")
13864	      (const_string "sse2_noavx")
13865	    (eq_attr "alternative" "4")
13866	      (const_string "avx")
13867	    (eq_attr "alternative" "5")
13868	      (const_string "avx512bw")
13869	    (eq_attr "alternative" "6")
13870	      (const_string "noavx")
13871	    (eq_attr "alternative" "8")
13872	      (const_string "x64")
13873	   ]
13874	   (const_string "*")))
13875   (set (attr "type")
13876     (cond [(eq_attr "alternative" "2,6,7")
13877	      (const_string "ssemov")
13878	    (eq_attr "alternative" "3,4,5")
13879	      (const_string "sseishft1")
13880	    (eq_attr "alternative" "8")
13881	      (const_string "imov")
13882	   ]
13883	   (const_string "sselog1")))
13884   (set (attr "length_immediate")
13885     (if_then_else (eq_attr "alternative" "0,1,3,4,5")
13886		   (const_string "1")
13887		   (const_string "*")))
13888   (set (attr "prefix_rex")
13889     (if_then_else (eq_attr "alternative" "0,1")
13890		   (const_string "1")
13891		   (const_string "*")))
13892   (set (attr "prefix_extra")
13893     (if_then_else (eq_attr "alternative" "0,1")
13894		   (const_string "1")
13895		   (const_string "*")))
13896   (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
13897   (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
13898
13899(define_split
13900  [(set (match_operand:<ssescalarmode> 0 "register_operand")
13901	(vec_select:<ssescalarmode>
13902	  (match_operand:VI_128 1 "memory_operand")
13903	  (parallel
13904	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13905  "TARGET_SSE && reload_completed"
13906  [(set (match_dup 0) (match_dup 1))]
13907{
13908  int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13909
13910  operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13911})
13912
13913(define_insn "*vec_extractv2ti"
13914  [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
13915	(vec_select:TI
13916	  (match_operand:V2TI 1 "register_operand" "x,v")
13917	  (parallel
13918	    [(match_operand:SI 2 "const_0_to_1_operand")])))]
13919  "TARGET_AVX"
13920  "@
13921   vextract%~128\t{%2, %1, %0|%0, %1, %2}
13922   vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
13923  [(set_attr "type" "sselog")
13924   (set_attr "prefix_extra" "1")
13925   (set_attr "length_immediate" "1")
13926   (set_attr "prefix" "vex,evex")
13927   (set_attr "mode" "OI")])
13928
13929(define_insn "*vec_extractv4ti"
13930  [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
13931	(vec_select:TI
13932	  (match_operand:V4TI 1 "register_operand" "v")
13933	  (parallel
13934	    [(match_operand:SI 2 "const_0_to_3_operand")])))]
13935  "TARGET_AVX512F"
13936  "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
13937  [(set_attr "type" "sselog")
13938   (set_attr "prefix_extra" "1")
13939   (set_attr "length_immediate" "1")
13940   (set_attr "prefix" "evex")
13941   (set_attr "mode" "XI")])
13942
13943(define_mode_iterator VEXTRACTI128_MODE
13944  [(V4TI "TARGET_AVX512F") V2TI])
13945
13946(define_split
13947  [(set (match_operand:TI 0 "nonimmediate_operand")
13948	(vec_select:TI
13949	  (match_operand:VEXTRACTI128_MODE 1 "register_operand")
13950	  (parallel [(const_int 0)])))]
13951  "TARGET_AVX
13952   && reload_completed
13953   && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
13954  [(set (match_dup 0) (match_dup 1))]
13955  "operands[1] = gen_lowpart (TImode, operands[1]);")
13956
13957;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13958;; vector modes into vec_extract*.
13959(define_split
13960  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13961	(subreg:SWI48x (match_operand 1 "register_operand") 0))]
13962  "can_create_pseudo_p ()
13963   && REG_P (operands[1])
13964   && VECTOR_MODE_P (GET_MODE (operands[1]))
13965   && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
13966       || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
13967       || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
13968   && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13969  [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13970					 (parallel [(const_int 0)])))]
13971{
13972  rtx tmp;
13973
13974  switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13975    {
13976    case 64:
13977      if (<MODE>mode == SImode)
13978	{
13979	  tmp = gen_reg_rtx (V8SImode);
13980	  emit_insn (gen_vec_extract_lo_v16si (tmp,
13981					       gen_lowpart (V16SImode,
13982							    operands[1])));
13983	}
13984      else
13985	{
13986	  tmp = gen_reg_rtx (V4DImode);
13987	  emit_insn (gen_vec_extract_lo_v8di (tmp,
13988					      gen_lowpart (V8DImode,
13989							   operands[1])));
13990	}
13991      operands[1] = tmp;
13992      /* FALLTHRU */
13993    case 32:
13994      tmp = gen_reg_rtx (<ssevecmode>mode);
13995      if (<MODE>mode == SImode)
13996	emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13997							      operands[1])));
13998      else
13999	emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
14000							      operands[1])));
14001      operands[1] = tmp;
14002      break;
14003    case 16:
14004      operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
14005      break;
14006    }
14007})
14008
14009(define_insn "*vec_concatv2si_sse4_1"
14010  [(set (match_operand:V2SI 0 "register_operand"
14011	  "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
14012	(vec_concat:V2SI
14013	  (match_operand:SI 1 "nonimmediate_operand"
14014	  "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
14015	  (match_operand:SI 2 "vector_move_operand"
14016	  " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
14017  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14018  "@
14019   pinsrd\t{$1, %2, %0|%0, %2, 1}
14020   pinsrd\t{$1, %2, %0|%0, %2, 1}
14021   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14022   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14023   punpckldq\t{%2, %0|%0, %2}
14024   punpckldq\t{%2, %0|%0, %2}
14025   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
14026   %vmovd\t{%1, %0|%0, %1}
14027   punpckldq\t{%2, %0|%0, %2}
14028   movd\t{%1, %0|%0, %1}"
14029  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
14030   (set (attr "type")
14031     (cond [(eq_attr "alternative" "7")
14032	      (const_string "ssemov")
14033	    (eq_attr "alternative" "8")
14034	      (const_string "mmxcvt")
14035	    (eq_attr "alternative" "9")
14036	      (const_string "mmxmov")
14037	   ]
14038	   (const_string "sselog")))
14039   (set (attr "prefix_extra")
14040     (if_then_else (eq_attr "alternative" "0,1,2,3")
14041		   (const_string "1")
14042		   (const_string "*")))
14043   (set (attr "length_immediate")
14044     (if_then_else (eq_attr "alternative" "0,1,2,3")
14045		   (const_string "1")
14046		   (const_string "*")))
14047   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
14048   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
14049
14050;; ??? In theory we can match memory for the MMX alternative, but allowing
14051;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
14052;; alternatives pretty much forces the MMX alternative to be chosen.
14053(define_insn "*vec_concatv2si"
14054  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,x,x,*y,*y")
14055	(vec_concat:V2SI
14056	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
14057	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,C, x,C,*y,C")))]
14058  "TARGET_SSE && !TARGET_SSE4_1"
14059  "@
14060   punpckldq\t{%2, %0|%0, %2}
14061   movd\t{%1, %0|%0, %1}
14062   movd\t{%1, %0|%0, %1}
14063   unpcklps\t{%2, %0|%0, %2}
14064   movss\t{%1, %0|%0, %1}
14065   punpckldq\t{%2, %0|%0, %2}
14066   movd\t{%1, %0|%0, %1}"
14067  [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
14068   (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
14069   (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
14070
14071(define_insn "*vec_concatv4si"
14072  [(set (match_operand:V4SI 0 "register_operand"       "=x,v,x,x,v")
14073	(vec_concat:V4SI
14074	  (match_operand:V2SI 1 "register_operand"     " 0,v,0,0,v")
14075	  (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
14076  "TARGET_SSE"
14077  "@
14078   punpcklqdq\t{%2, %0|%0, %2}
14079   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
14080   movlhps\t{%2, %0|%0, %2}
14081   movhps\t{%2, %0|%0, %q2}
14082   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
14083  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
14084   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
14085   (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
14086   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
14087
14088;; movd instead of movq is required to handle broken assemblers.
14089(define_insn "vec_concatv2di"
14090  [(set (match_operand:V2DI 0 "register_operand"
14091	  "=Yr,*x,x ,v ,Yi,v ,x    ,x,v ,x,x,v")
14092	(vec_concat:V2DI
14093	  (match_operand:DI 1 "nonimmediate_operand"
14094	  "  0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v")
14095	  (match_operand:DI 2 "vector_move_operand"
14096	  " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
14097  "TARGET_SSE"
14098  "@
14099   pinsrq\t{$1, %2, %0|%0, %2, 1}
14100   pinsrq\t{$1, %2, %0|%0, %2, 1}
14101   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
14102   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
14103   * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
14104   %vmovq\t{%1, %0|%0, %1}
14105   movq2dq\t{%1, %0|%0, %1}
14106   punpcklqdq\t{%2, %0|%0, %2}
14107   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
14108   movlhps\t{%2, %0|%0, %2}
14109   movhps\t{%2, %0|%0, %2}
14110   vmovhps\t{%2, %1, %0|%0, %1, %2}"
14111  [(set (attr "isa")
14112     (cond [(eq_attr "alternative" "0,1")
14113	      (const_string "x64_sse4_noavx")
14114	    (eq_attr "alternative" "2")
14115	      (const_string "x64_avx")
14116	    (eq_attr "alternative" "3")
14117	      (const_string "x64_avx512dq")
14118	    (eq_attr "alternative" "4")
14119	      (const_string "x64")
14120	    (eq_attr "alternative" "5,6")
14121	      (const_string "sse2")
14122	    (eq_attr "alternative" "7")
14123	      (const_string "sse2_noavx")
14124	    (eq_attr "alternative" "8,11")
14125	      (const_string "avx")
14126	   ]
14127	   (const_string "noavx")))
14128   (set (attr "type")
14129     (if_then_else
14130       (eq_attr "alternative" "0,1,2,3,7,8")
14131       (const_string "sselog")
14132       (const_string "ssemov")))
14133   (set (attr "prefix_rex")
14134     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
14135		   (const_string "1")
14136		   (const_string "*")))
14137   (set (attr "prefix_extra")
14138     (if_then_else (eq_attr "alternative" "0,1,2,3")
14139		   (const_string "1")
14140		   (const_string "*")))
14141   (set (attr "length_immediate")
14142     (if_then_else (eq_attr "alternative" "0,1,2,3")
14143		   (const_string "1")
14144		   (const_string "*")))
14145   (set (attr "prefix")
14146     (cond [(eq_attr "alternative" "2")
14147	      (const_string "vex")
14148	    (eq_attr "alternative" "3")
14149	      (const_string "evex")
14150	    (eq_attr "alternative" "4,5")
14151	      (const_string "maybe_vex")
14152	    (eq_attr "alternative" "8,11")
14153	      (const_string "maybe_evex")
14154	   ]
14155	   (const_string "orig")))
14156   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
14157
14158;; vmovq clears also the higher bits.
14159(define_insn "vec_set<mode>_0"
14160  [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=Yi,v")
14161	(vec_merge:VI8_AVX_AVX512F
14162	  (vec_duplicate:VI8_AVX_AVX512F
14163	    (match_operand:<ssescalarmode> 2 "general_operand" "r,vm"))
14164	  (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
14165	  (const_int 1)))]
14166  "TARGET_AVX"
14167  "vmovq\t{%2, %x0|%x0, %2}"
14168  [(set_attr "isa" "x64,*")
14169   (set_attr "type" "ssemov")
14170   (set_attr "prefix_rex" "1,*")
14171   (set_attr "prefix" "maybe_evex")
14172   (set_attr "mode" "TI")])
14173
14174(define_expand "vec_unpacks_lo_<mode>"
14175  [(match_operand:<sseunpackmode> 0 "register_operand")
14176   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14177  "TARGET_SSE2"
14178  "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
14179
14180(define_expand "vec_unpacks_hi_<mode>"
14181  [(match_operand:<sseunpackmode> 0 "register_operand")
14182   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14183  "TARGET_SSE2"
14184  "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
14185
14186(define_expand "vec_unpacku_lo_<mode>"
14187  [(match_operand:<sseunpackmode> 0 "register_operand")
14188   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14189  "TARGET_SSE2"
14190  "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
14191
14192(define_expand "vec_unpacks_lo_hi"
14193  [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14194        (match_operand:HI 1 "register_operand"))]
14195  "TARGET_AVX512F")
14196
14197(define_expand "vec_unpacks_lo_si"
14198  [(set (match_operand:HI 0 "register_operand")
14199        (subreg:HI (match_operand:SI 1 "register_operand") 0))]
14200  "TARGET_AVX512F")
14201
14202(define_expand "vec_unpacks_lo_di"
14203  [(set (match_operand:SI 0 "register_operand")
14204        (subreg:SI (match_operand:DI 1 "register_operand") 0))]
14205  "TARGET_AVX512BW")
14206
14207(define_expand "vec_unpacku_hi_<mode>"
14208  [(match_operand:<sseunpackmode> 0 "register_operand")
14209   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14210  "TARGET_SSE2"
14211  "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
14212
14213(define_expand "vec_unpacks_hi_hi"
14214  [(parallel
14215     [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14216	   (lshiftrt:HI (match_operand:HI 1 "register_operand")
14217			(const_int 8)))
14218      (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14219  "TARGET_AVX512F")
14220
14221(define_expand "vec_unpacks_hi_<mode>"
14222  [(parallel
14223     [(set (subreg:SWI48x
14224	     (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
14225	   (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
14226			    (match_dup 2)))
14227      (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14228  "TARGET_AVX512BW"
14229  "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
14230
14231;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14232;;
14233;; Miscellaneous
14234;;
14235;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14236
14237(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
14238  [(set (match_operand:VI12_AVX2 0 "register_operand")
14239	(truncate:VI12_AVX2
14240	  (lshiftrt:<ssedoublemode>
14241	    (plus:<ssedoublemode>
14242	      (plus:<ssedoublemode>
14243		(zero_extend:<ssedoublemode>
14244		  (match_operand:VI12_AVX2 1 "vector_operand"))
14245		(zero_extend:<ssedoublemode>
14246		  (match_operand:VI12_AVX2 2 "vector_operand")))
14247	      (match_dup <mask_expand_op3>))
14248	    (const_int 1))))]
14249  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14250{
14251  rtx tmp;
14252  if (<mask_applied>)
14253    tmp = operands[3];
14254  operands[3] = CONST1_RTX(<MODE>mode);
14255  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
14256
14257  if (<mask_applied>)
14258    {
14259      operands[5] = operands[3];
14260      operands[3] = tmp;
14261    }
14262})
14263
14264(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
14265  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
14266	(truncate:VI12_AVX2
14267	  (lshiftrt:<ssedoublemode>
14268	    (plus:<ssedoublemode>
14269	      (plus:<ssedoublemode>
14270		(zero_extend:<ssedoublemode>
14271		  (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
14272		(zero_extend:<ssedoublemode>
14273		  (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
14274	      (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
14275	    (const_int 1))))]
14276  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14277   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14278  "@
14279   pavg<ssemodesuffix>\t{%2, %0|%0, %2}
14280   vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14281  [(set_attr "isa" "noavx,avx")
14282   (set_attr "type" "sseiadd")
14283   (set_attr "prefix_data16" "1,*")
14284   (set_attr "prefix" "orig,<mask_prefix>")
14285   (set_attr "mode" "<sseinsnmode>")])
14286
14287;; The correct representation for this is absolutely enormous, and
14288;; surely not generally useful.
14289(define_insn "<sse2_avx2>_psadbw"
14290  [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
14291	(unspec:VI8_AVX2_AVX512BW
14292	  [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
14293	   (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
14294	  UNSPEC_PSADBW))]
14295  "TARGET_SSE2"
14296  "@
14297   psadbw\t{%2, %0|%0, %2}
14298   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
14299  [(set_attr "isa" "noavx,avx")
14300   (set_attr "type" "sseiadd")
14301   (set_attr "atom_unit" "simul")
14302   (set_attr "prefix_data16" "1,*")
14303   (set_attr "prefix" "orig,maybe_evex")
14304   (set_attr "mode" "<sseinsnmode>")])
14305
14306(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
14307  [(set (match_operand:SI 0 "register_operand" "=r")
14308	(unspec:SI
14309	  [(match_operand:VF_128_256 1 "register_operand" "x")]
14310	  UNSPEC_MOVMSK))]
14311  "TARGET_SSE"
14312  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
14313  [(set_attr "type" "ssemov")
14314   (set_attr "prefix" "maybe_vex")
14315   (set_attr "mode" "<MODE>")])
14316
14317(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
14318  [(set (match_operand:DI 0 "register_operand" "=r")
14319	(zero_extend:DI
14320	  (unspec:SI
14321	    [(match_operand:VF_128_256 1 "register_operand" "x")]
14322	    UNSPEC_MOVMSK)))]
14323  "TARGET_64BIT && TARGET_SSE"
14324  "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
14325  [(set_attr "type" "ssemov")
14326   (set_attr "prefix" "maybe_vex")
14327   (set_attr "mode" "<MODE>")])
14328
14329(define_insn "<sse2_avx2>_pmovmskb"
14330  [(set (match_operand:SI 0 "register_operand" "=r")
14331	(unspec:SI
14332	  [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14333	  UNSPEC_MOVMSK))]
14334  "TARGET_SSE2"
14335  "%vpmovmskb\t{%1, %0|%0, %1}"
14336  [(set_attr "type" "ssemov")
14337   (set (attr "prefix_data16")
14338     (if_then_else
14339       (match_test "TARGET_AVX")
14340     (const_string "*")
14341     (const_string "1")))
14342   (set_attr "prefix" "maybe_vex")
14343   (set_attr "mode" "SI")])
14344
14345(define_insn "*<sse2_avx2>_pmovmskb_zext"
14346  [(set (match_operand:DI 0 "register_operand" "=r")
14347	(zero_extend:DI
14348	  (unspec:SI
14349	    [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14350	    UNSPEC_MOVMSK)))]
14351  "TARGET_64BIT && TARGET_SSE2"
14352  "%vpmovmskb\t{%1, %k0|%k0, %1}"
14353  [(set_attr "type" "ssemov")
14354   (set (attr "prefix_data16")
14355     (if_then_else
14356       (match_test "TARGET_AVX")
14357     (const_string "*")
14358     (const_string "1")))
14359   (set_attr "prefix" "maybe_vex")
14360   (set_attr "mode" "SI")])
14361
14362(define_expand "sse2_maskmovdqu"
14363  [(set (match_operand:V16QI 0 "memory_operand")
14364	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
14365		       (match_operand:V16QI 2 "register_operand")
14366		       (match_dup 0)]
14367		      UNSPEC_MASKMOV))]
14368  "TARGET_SSE2")
14369
14370(define_insn "*sse2_maskmovdqu"
14371  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
14372	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
14373		       (match_operand:V16QI 2 "register_operand" "x")
14374		       (mem:V16QI (match_dup 0))]
14375		      UNSPEC_MASKMOV))]
14376  "TARGET_SSE2"
14377{
14378  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
14379     that requires %v to be at the beginning of the opcode name.  */
14380  if (Pmode != word_mode)
14381    fputs ("\taddr32", asm_out_file);
14382  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
14383}
14384  [(set_attr "type" "ssemov")
14385   (set_attr "prefix_data16" "1")
14386   (set (attr "length_address")
14387     (symbol_ref ("Pmode != word_mode")))
14388   ;; The implicit %rdi operand confuses default length_vex computation.
14389   (set (attr "length_vex")
14390     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
14391   (set_attr "prefix" "maybe_vex")
14392   (set_attr "znver1_decode" "vector")
14393   (set_attr "mode" "TI")])
14394
14395(define_insn "sse_ldmxcsr"
14396  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
14397		    UNSPECV_LDMXCSR)]
14398  "TARGET_SSE"
14399  "%vldmxcsr\t%0"
14400  [(set_attr "type" "sse")
14401   (set_attr "atom_sse_attr" "mxcsr")
14402   (set_attr "prefix" "maybe_vex")
14403   (set_attr "memory" "load")])
14404
14405(define_insn "sse_stmxcsr"
14406  [(set (match_operand:SI 0 "memory_operand" "=m")
14407	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
14408  "TARGET_SSE"
14409  "%vstmxcsr\t%0"
14410  [(set_attr "type" "sse")
14411   (set_attr "atom_sse_attr" "mxcsr")
14412   (set_attr "prefix" "maybe_vex")
14413   (set_attr "memory" "store")])
14414
14415(define_insn "sse2_clflush"
14416  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
14417		    UNSPECV_CLFLUSH)]
14418  "TARGET_SSE2"
14419  "clflush\t%a0"
14420  [(set_attr "type" "sse")
14421   (set_attr "atom_sse_attr" "fence")
14422   (set_attr "memory" "unknown")])
14423
14424;; As per AMD and Intel ISA manuals, the first operand is extensions
14425;; and it goes to %ecx. The second operand received is hints and it goes
14426;; to %eax.
14427(define_insn "sse3_mwait"
14428  [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
14429		     (match_operand:SI 1 "register_operand" "a")]
14430		    UNSPECV_MWAIT)]
14431  "TARGET_SSE3"
14432;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
14433;; Since 32bit register operands are implicitly zero extended to 64bit,
14434;; we only need to set up 32bit registers.
14435  "mwait"
14436  [(set_attr "length" "3")])
14437
14438(define_insn "sse3_monitor_<mode>"
14439  [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
14440		     (match_operand:SI 1 "register_operand" "c")
14441		     (match_operand:SI 2 "register_operand" "d")]
14442		    UNSPECV_MONITOR)]
14443  "TARGET_SSE3"
14444;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
14445;; RCX and RDX are used.  Since 32bit register operands are implicitly
14446;; zero extended to 64bit, we only need to set up 32bit registers.
14447  "%^monitor"
14448  [(set (attr "length")
14449     (symbol_ref ("(Pmode != word_mode) + 3")))])
14450
14451;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14452;;
14453;; SSSE3 instructions
14454;;
14455;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14456
14457(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
14458
14459(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
14460  [(set (match_operand:V16HI 0 "register_operand" "=x")
14461	(vec_concat:V16HI
14462	  (vec_concat:V8HI
14463	    (vec_concat:V4HI
14464	      (vec_concat:V2HI
14465		(ssse3_plusminus:HI
14466		  (vec_select:HI
14467		    (match_operand:V16HI 1 "register_operand" "x")
14468		    (parallel [(const_int 0)]))
14469		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14470		(ssse3_plusminus:HI
14471		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14472		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14473	      (vec_concat:V2HI
14474		(ssse3_plusminus:HI
14475		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14476		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14477		(ssse3_plusminus:HI
14478		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14479		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14480	    (vec_concat:V4HI
14481	      (vec_concat:V2HI
14482		(ssse3_plusminus:HI
14483		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
14484		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
14485		(ssse3_plusminus:HI
14486		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
14487		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
14488	      (vec_concat:V2HI
14489		(ssse3_plusminus:HI
14490		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
14491		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
14492		(ssse3_plusminus:HI
14493		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
14494		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
14495	  (vec_concat:V8HI
14496	    (vec_concat:V4HI
14497	      (vec_concat:V2HI
14498		(ssse3_plusminus:HI
14499		  (vec_select:HI
14500		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14501		    (parallel [(const_int 0)]))
14502		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14503		(ssse3_plusminus:HI
14504		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14505		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14506	      (vec_concat:V2HI
14507		(ssse3_plusminus:HI
14508		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14509		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14510		(ssse3_plusminus:HI
14511		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14512		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
14513	    (vec_concat:V4HI
14514	      (vec_concat:V2HI
14515		(ssse3_plusminus:HI
14516		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
14517		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
14518		(ssse3_plusminus:HI
14519		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
14520		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
14521	      (vec_concat:V2HI
14522		(ssse3_plusminus:HI
14523		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
14524		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
14525		(ssse3_plusminus:HI
14526		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
14527		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
14528  "TARGET_AVX2"
14529  "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14530  [(set_attr "type" "sseiadd")
14531   (set_attr "prefix_extra" "1")
14532   (set_attr "prefix" "vex")
14533   (set_attr "mode" "OI")])
14534
14535(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
14536  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14537	(vec_concat:V8HI
14538	  (vec_concat:V4HI
14539	    (vec_concat:V2HI
14540	      (ssse3_plusminus:HI
14541		(vec_select:HI
14542		  (match_operand:V8HI 1 "register_operand" "0,x")
14543		  (parallel [(const_int 0)]))
14544		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14545	      (ssse3_plusminus:HI
14546		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14547		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14548	    (vec_concat:V2HI
14549	      (ssse3_plusminus:HI
14550		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14551		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14552	      (ssse3_plusminus:HI
14553		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14554		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14555	  (vec_concat:V4HI
14556	    (vec_concat:V2HI
14557	      (ssse3_plusminus:HI
14558		(vec_select:HI
14559		  (match_operand:V8HI 2 "vector_operand" "xBm,xm")
14560		  (parallel [(const_int 0)]))
14561		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14562	      (ssse3_plusminus:HI
14563		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14564		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14565	    (vec_concat:V2HI
14566	      (ssse3_plusminus:HI
14567		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14568		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14569	      (ssse3_plusminus:HI
14570		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14571		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
14572  "TARGET_SSSE3"
14573  "@
14574   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
14575   vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14576  [(set_attr "isa" "noavx,avx")
14577   (set_attr "type" "sseiadd")
14578   (set_attr "atom_unit" "complex")
14579   (set_attr "prefix_data16" "1,*")
14580   (set_attr "prefix_extra" "1")
14581   (set_attr "prefix" "orig,vex")
14582   (set_attr "mode" "TI")])
14583
14584(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
14585  [(set (match_operand:V4HI 0 "register_operand" "=y")
14586	(vec_concat:V4HI
14587	  (vec_concat:V2HI
14588	    (ssse3_plusminus:HI
14589	      (vec_select:HI
14590		(match_operand:V4HI 1 "register_operand" "0")
14591		(parallel [(const_int 0)]))
14592	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14593	    (ssse3_plusminus:HI
14594	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14595	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14596	  (vec_concat:V2HI
14597	    (ssse3_plusminus:HI
14598	      (vec_select:HI
14599		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
14600		(parallel [(const_int 0)]))
14601	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14602	    (ssse3_plusminus:HI
14603	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14604	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
14605  "TARGET_SSSE3"
14606  "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
14607  [(set_attr "type" "sseiadd")
14608   (set_attr "atom_unit" "complex")
14609   (set_attr "prefix_extra" "1")
14610   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14611   (set_attr "mode" "DI")])
14612
14613(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
14614  [(set (match_operand:V8SI 0 "register_operand" "=x")
14615	(vec_concat:V8SI
14616	  (vec_concat:V4SI
14617	    (vec_concat:V2SI
14618	      (plusminus:SI
14619		(vec_select:SI
14620		  (match_operand:V8SI 1 "register_operand" "x")
14621		  (parallel [(const_int 0)]))
14622		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14623	      (plusminus:SI
14624		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14625		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14626	    (vec_concat:V2SI
14627	      (plusminus:SI
14628		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
14629		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
14630	      (plusminus:SI
14631		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
14632		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
14633	  (vec_concat:V4SI
14634	    (vec_concat:V2SI
14635	      (plusminus:SI
14636		(vec_select:SI
14637		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
14638		  (parallel [(const_int 0)]))
14639		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14640	      (plusminus:SI
14641		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14642		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
14643	    (vec_concat:V2SI
14644	      (plusminus:SI
14645		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
14646		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
14647	      (plusminus:SI
14648		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
14649		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
14650  "TARGET_AVX2"
14651  "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14652  [(set_attr "type" "sseiadd")
14653   (set_attr "prefix_extra" "1")
14654   (set_attr "prefix" "vex")
14655   (set_attr "mode" "OI")])
14656
14657(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
14658  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
14659	(vec_concat:V4SI
14660	  (vec_concat:V2SI
14661	    (plusminus:SI
14662	      (vec_select:SI
14663		(match_operand:V4SI 1 "register_operand" "0,x")
14664		(parallel [(const_int 0)]))
14665	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14666	    (plusminus:SI
14667	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14668	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14669	  (vec_concat:V2SI
14670	    (plusminus:SI
14671	      (vec_select:SI
14672		(match_operand:V4SI 2 "vector_operand" "xBm,xm")
14673		(parallel [(const_int 0)]))
14674	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14675	    (plusminus:SI
14676	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14677	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14678  "TARGET_SSSE3"
14679  "@
14680   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14681   vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14682  [(set_attr "isa" "noavx,avx")
14683   (set_attr "type" "sseiadd")
14684   (set_attr "atom_unit" "complex")
14685   (set_attr "prefix_data16" "1,*")
14686   (set_attr "prefix_extra" "1")
14687   (set_attr "prefix" "orig,vex")
14688   (set_attr "mode" "TI")])
14689
14690(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
14691  [(set (match_operand:V2SI 0 "register_operand" "=y")
14692	(vec_concat:V2SI
14693	  (plusminus:SI
14694	    (vec_select:SI
14695	      (match_operand:V2SI 1 "register_operand" "0")
14696	      (parallel [(const_int 0)]))
14697	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14698	  (plusminus:SI
14699	    (vec_select:SI
14700	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14701	      (parallel [(const_int 0)]))
14702	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14703  "TARGET_SSSE3"
14704  "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
14705  [(set_attr "type" "sseiadd")
14706   (set_attr "atom_unit" "complex")
14707   (set_attr "prefix_extra" "1")
14708   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14709   (set_attr "mode" "DI")])
14710
14711(define_insn "avx2_pmaddubsw256"
14712  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
14713	(ss_plus:V16HI
14714	  (mult:V16HI
14715	    (zero_extend:V16HI
14716	      (vec_select:V16QI
14717		(match_operand:V32QI 1 "register_operand" "x,v")
14718		(parallel [(const_int 0) (const_int 2)
14719			   (const_int 4) (const_int 6)
14720			   (const_int 8) (const_int 10)
14721			   (const_int 12) (const_int 14)
14722			   (const_int 16) (const_int 18)
14723			   (const_int 20) (const_int 22)
14724			   (const_int 24) (const_int 26)
14725			   (const_int 28) (const_int 30)])))
14726	    (sign_extend:V16HI
14727	      (vec_select:V16QI
14728		(match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
14729		(parallel [(const_int 0) (const_int 2)
14730			   (const_int 4) (const_int 6)
14731			   (const_int 8) (const_int 10)
14732			   (const_int 12) (const_int 14)
14733			   (const_int 16) (const_int 18)
14734			   (const_int 20) (const_int 22)
14735			   (const_int 24) (const_int 26)
14736			   (const_int 28) (const_int 30)]))))
14737	  (mult:V16HI
14738	    (zero_extend:V16HI
14739	      (vec_select:V16QI (match_dup 1)
14740		(parallel [(const_int 1) (const_int 3)
14741			   (const_int 5) (const_int 7)
14742			   (const_int 9) (const_int 11)
14743			   (const_int 13) (const_int 15)
14744			   (const_int 17) (const_int 19)
14745			   (const_int 21) (const_int 23)
14746			   (const_int 25) (const_int 27)
14747			   (const_int 29) (const_int 31)])))
14748	    (sign_extend:V16HI
14749	      (vec_select:V16QI (match_dup 2)
14750		(parallel [(const_int 1) (const_int 3)
14751			   (const_int 5) (const_int 7)
14752			   (const_int 9) (const_int 11)
14753			   (const_int 13) (const_int 15)
14754			   (const_int 17) (const_int 19)
14755			   (const_int 21) (const_int 23)
14756			   (const_int 25) (const_int 27)
14757			   (const_int 29) (const_int 31)]))))))]
14758  "TARGET_AVX2"
14759  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14760  [(set_attr "isa" "*,avx512bw")
14761   (set_attr "type" "sseiadd")
14762   (set_attr "prefix_extra" "1")
14763   (set_attr "prefix" "vex,evex")
14764   (set_attr "mode" "OI")])
14765
14766;; The correct representation for this is absolutely enormous, and
14767;; surely not generally useful.
14768(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14769  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14770          (unspec:VI2_AVX512VL
14771            [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14772             (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14773             UNSPEC_PMADDUBSW512))]
14774   "TARGET_AVX512BW"
14775   "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14776  [(set_attr "type" "sseiadd")
14777   (set_attr "prefix" "evex")
14778   (set_attr "mode" "XI")])
14779
14780(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14781  [(set (match_operand:V32HI 0 "register_operand" "=v")
14782	(truncate:V32HI
14783	  (lshiftrt:V32SI
14784	    (plus:V32SI
14785	      (lshiftrt:V32SI
14786		(mult:V32SI
14787		  (sign_extend:V32SI
14788		    (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14789		  (sign_extend:V32SI
14790		    (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14791		(const_int 14))
14792	      (const_vector:V32HI [(const_int 1) (const_int 1)
14793				   (const_int 1) (const_int 1)
14794				   (const_int 1) (const_int 1)
14795				   (const_int 1) (const_int 1)
14796				   (const_int 1) (const_int 1)
14797				   (const_int 1) (const_int 1)
14798				   (const_int 1) (const_int 1)
14799				   (const_int 1) (const_int 1)
14800				   (const_int 1) (const_int 1)
14801				   (const_int 1) (const_int 1)
14802				   (const_int 1) (const_int 1)
14803				   (const_int 1) (const_int 1)
14804				   (const_int 1) (const_int 1)
14805				   (const_int 1) (const_int 1)
14806				   (const_int 1) (const_int 1)
14807				   (const_int 1) (const_int 1)]))
14808	    (const_int 1))))]
14809  "TARGET_AVX512BW"
14810  "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14811  [(set_attr "type" "sseimul")
14812   (set_attr "prefix" "evex")
14813   (set_attr "mode" "XI")])
14814
14815(define_insn "ssse3_pmaddubsw128"
14816  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
14817	(ss_plus:V8HI
14818	  (mult:V8HI
14819	    (zero_extend:V8HI
14820	      (vec_select:V8QI
14821		(match_operand:V16QI 1 "register_operand" "0,x,v")
14822		(parallel [(const_int 0) (const_int 2)
14823			   (const_int 4) (const_int 6)
14824			   (const_int 8) (const_int 10)
14825			   (const_int 12) (const_int 14)])))
14826	    (sign_extend:V8HI
14827	      (vec_select:V8QI
14828		(match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
14829		(parallel [(const_int 0) (const_int 2)
14830			   (const_int 4) (const_int 6)
14831			   (const_int 8) (const_int 10)
14832			   (const_int 12) (const_int 14)]))))
14833	  (mult:V8HI
14834	    (zero_extend:V8HI
14835	      (vec_select:V8QI (match_dup 1)
14836		(parallel [(const_int 1) (const_int 3)
14837			   (const_int 5) (const_int 7)
14838			   (const_int 9) (const_int 11)
14839			   (const_int 13) (const_int 15)])))
14840	    (sign_extend:V8HI
14841	      (vec_select:V8QI (match_dup 2)
14842		(parallel [(const_int 1) (const_int 3)
14843			   (const_int 5) (const_int 7)
14844			   (const_int 9) (const_int 11)
14845			   (const_int 13) (const_int 15)]))))))]
14846  "TARGET_SSSE3"
14847  "@
14848   pmaddubsw\t{%2, %0|%0, %2}
14849   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
14850   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14851  [(set_attr "isa" "noavx,avx,avx512bw")
14852   (set_attr "type" "sseiadd")
14853   (set_attr "atom_unit" "simul")
14854   (set_attr "prefix_data16" "1,*,*")
14855   (set_attr "prefix_extra" "1")
14856   (set_attr "prefix" "orig,vex,evex")
14857   (set_attr "mode" "TI")])
14858
14859(define_insn "ssse3_pmaddubsw"
14860  [(set (match_operand:V4HI 0 "register_operand" "=y")
14861	(ss_plus:V4HI
14862	  (mult:V4HI
14863	    (zero_extend:V4HI
14864	      (vec_select:V4QI
14865		(match_operand:V8QI 1 "register_operand" "0")
14866		(parallel [(const_int 0) (const_int 2)
14867			   (const_int 4) (const_int 6)])))
14868	    (sign_extend:V4HI
14869	      (vec_select:V4QI
14870		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
14871		(parallel [(const_int 0) (const_int 2)
14872			   (const_int 4) (const_int 6)]))))
14873	  (mult:V4HI
14874	    (zero_extend:V4HI
14875	      (vec_select:V4QI (match_dup 1)
14876		(parallel [(const_int 1) (const_int 3)
14877			   (const_int 5) (const_int 7)])))
14878	    (sign_extend:V4HI
14879	      (vec_select:V4QI (match_dup 2)
14880		(parallel [(const_int 1) (const_int 3)
14881			   (const_int 5) (const_int 7)]))))))]
14882  "TARGET_SSSE3"
14883  "pmaddubsw\t{%2, %0|%0, %2}"
14884  [(set_attr "type" "sseiadd")
14885   (set_attr "atom_unit" "simul")
14886   (set_attr "prefix_extra" "1")
14887   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14888   (set_attr "mode" "DI")])
14889
14890(define_mode_iterator PMULHRSW
14891  [V4HI V8HI (V16HI "TARGET_AVX2")])
14892
14893(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14894  [(set (match_operand:PMULHRSW 0 "register_operand")
14895	(vec_merge:PMULHRSW
14896	  (truncate:PMULHRSW
14897	    (lshiftrt:<ssedoublemode>
14898	      (plus:<ssedoublemode>
14899	        (lshiftrt:<ssedoublemode>
14900		  (mult:<ssedoublemode>
14901		    (sign_extend:<ssedoublemode>
14902		      (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14903		    (sign_extend:<ssedoublemode>
14904		      (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14905		  (const_int 14))
14906	        (match_dup 5))
14907	      (const_int 1)))
14908	  (match_operand:PMULHRSW 3 "register_operand")
14909	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14910  "TARGET_AVX512BW && TARGET_AVX512VL"
14911{
14912  operands[5] = CONST1_RTX(<MODE>mode);
14913  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14914})
14915
14916(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14917  [(set (match_operand:PMULHRSW 0 "register_operand")
14918	(truncate:PMULHRSW
14919	  (lshiftrt:<ssedoublemode>
14920	    (plus:<ssedoublemode>
14921	      (lshiftrt:<ssedoublemode>
14922		(mult:<ssedoublemode>
14923		  (sign_extend:<ssedoublemode>
14924		    (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14925		  (sign_extend:<ssedoublemode>
14926		    (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14927		(const_int 14))
14928	      (match_dup 3))
14929	    (const_int 1))))]
14930  "TARGET_AVX2"
14931{
14932  operands[3] = CONST1_RTX(<MODE>mode);
14933  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14934})
14935
14936(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14937  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
14938	(truncate:VI2_AVX2
14939	  (lshiftrt:<ssedoublemode>
14940	    (plus:<ssedoublemode>
14941	      (lshiftrt:<ssedoublemode>
14942		(mult:<ssedoublemode>
14943		  (sign_extend:<ssedoublemode>
14944		    (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
14945		  (sign_extend:<ssedoublemode>
14946		    (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
14947		(const_int 14))
14948	      (match_operand:VI2_AVX2 3 "const1_operand"))
14949	    (const_int 1))))]
14950  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14951   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14952  "@
14953   pmulhrsw\t{%2, %0|%0, %2}
14954   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
14955   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14956  [(set_attr "isa" "noavx,avx,avx512bw")
14957   (set_attr "type" "sseimul")
14958   (set_attr "prefix_data16" "1,*,*")
14959   (set_attr "prefix_extra" "1")
14960   (set_attr "prefix" "orig,maybe_evex,evex")
14961   (set_attr "mode" "<sseinsnmode>")])
14962
14963(define_insn "*ssse3_pmulhrswv4hi3"
14964  [(set (match_operand:V4HI 0 "register_operand" "=y")
14965	(truncate:V4HI
14966	  (lshiftrt:V4SI
14967	    (plus:V4SI
14968	      (lshiftrt:V4SI
14969		(mult:V4SI
14970		  (sign_extend:V4SI
14971		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14972		  (sign_extend:V4SI
14973		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14974		(const_int 14))
14975	      (match_operand:V4HI 3 "const1_operand"))
14976	    (const_int 1))))]
14977  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14978  "pmulhrsw\t{%2, %0|%0, %2}"
14979  [(set_attr "type" "sseimul")
14980   (set_attr "prefix_extra" "1")
14981   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14982   (set_attr "mode" "DI")])
14983
14984(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14985  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
14986	(unspec:VI1_AVX512
14987	  [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
14988	   (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
14989	  UNSPEC_PSHUFB))]
14990  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14991  "@
14992   pshufb\t{%2, %0|%0, %2}
14993   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
14994   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14995  [(set_attr "isa" "noavx,avx,avx512bw")
14996   (set_attr "type" "sselog1")
14997   (set_attr "prefix_data16" "1,*,*")
14998   (set_attr "prefix_extra" "1")
14999   (set_attr "prefix" "orig,maybe_evex,evex")
15000   (set_attr "btver2_decode" "vector")
15001   (set_attr "mode" "<sseinsnmode>")])
15002
15003(define_insn "ssse3_pshufbv8qi3"
15004  [(set (match_operand:V8QI 0 "register_operand" "=y")
15005	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
15006		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
15007		     UNSPEC_PSHUFB))]
15008  "TARGET_SSSE3"
15009  "pshufb\t{%2, %0|%0, %2}";
15010  [(set_attr "type" "sselog1")
15011   (set_attr "prefix_extra" "1")
15012   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15013   (set_attr "mode" "DI")])
15014
15015(define_insn "<ssse3_avx2>_psign<mode>3"
15016  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
15017	(unspec:VI124_AVX2
15018	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
15019	   (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
15020	  UNSPEC_PSIGN))]
15021  "TARGET_SSSE3"
15022  "@
15023   psign<ssemodesuffix>\t{%2, %0|%0, %2}
15024   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15025  [(set_attr "isa" "noavx,avx")
15026   (set_attr "type" "sselog1")
15027   (set_attr "prefix_data16" "1,*")
15028   (set_attr "prefix_extra" "1")
15029   (set_attr "prefix" "orig,vex")
15030   (set_attr "mode" "<sseinsnmode>")])
15031
15032(define_insn "ssse3_psign<mode>3"
15033  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
15034	(unspec:MMXMODEI
15035	  [(match_operand:MMXMODEI 1 "register_operand" "0")
15036	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
15037	  UNSPEC_PSIGN))]
15038  "TARGET_SSSE3"
15039  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
15040  [(set_attr "type" "sselog1")
15041   (set_attr "prefix_extra" "1")
15042   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15043   (set_attr "mode" "DI")])
15044
15045(define_insn "<ssse3_avx2>_palignr<mode>_mask"
15046  [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
15047        (vec_merge:VI1_AVX512
15048	  (unspec:VI1_AVX512
15049	    [(match_operand:VI1_AVX512 1 "register_operand" "v")
15050	     (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
15051	     (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
15052	    UNSPEC_PALIGNR)
15053	(match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
15054	(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
15055  "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
15056{
15057  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15058  return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
15059}
15060  [(set_attr "type" "sseishft")
15061   (set_attr "atom_unit" "sishuf")
15062   (set_attr "prefix_extra" "1")
15063   (set_attr "length_immediate" "1")
15064   (set_attr "prefix" "evex")
15065   (set_attr "mode" "<sseinsnmode>")])
15066
15067(define_insn "<ssse3_avx2>_palignr<mode>"
15068  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
15069	(unspec:SSESCALARMODE
15070	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
15071	   (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
15072	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
15073	  UNSPEC_PALIGNR))]
15074  "TARGET_SSSE3"
15075{
15076  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15077
15078  switch (which_alternative)
15079    {
15080    case 0:
15081      return "palignr\t{%3, %2, %0|%0, %2, %3}";
15082    case 1:
15083    case 2:
15084      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15085    default:
15086      gcc_unreachable ();
15087    }
15088}
15089  [(set_attr "isa" "noavx,avx,avx512bw")
15090   (set_attr "type" "sseishft")
15091   (set_attr "atom_unit" "sishuf")
15092   (set_attr "prefix_data16" "1,*,*")
15093   (set_attr "prefix_extra" "1")
15094   (set_attr "length_immediate" "1")
15095   (set_attr "prefix" "orig,vex,evex")
15096   (set_attr "mode" "<sseinsnmode>")])
15097
15098(define_insn "ssse3_palignrdi"
15099  [(set (match_operand:DI 0 "register_operand" "=y")
15100	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
15101		    (match_operand:DI 2 "nonimmediate_operand" "ym")
15102		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
15103		   UNSPEC_PALIGNR))]
15104  "TARGET_SSSE3"
15105{
15106  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15107  return "palignr\t{%3, %2, %0|%0, %2, %3}";
15108}
15109  [(set_attr "type" "sseishft")
15110   (set_attr "atom_unit" "sishuf")
15111   (set_attr "prefix_extra" "1")
15112   (set_attr "length_immediate" "1")
15113   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15114   (set_attr "mode" "DI")])
15115
15116;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
15117;; modes for abs instruction on pre AVX-512 targets.
15118(define_mode_iterator VI1248_AVX512VL_AVX512BW
15119  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
15120   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
15121   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
15122   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
15123
15124(define_insn "*abs<mode>2"
15125  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
15126	(abs:VI1248_AVX512VL_AVX512BW
15127	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
15128  "TARGET_SSSE3"
15129  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
15130  [(set_attr "type" "sselog1")
15131   (set_attr "prefix_data16" "1")
15132   (set_attr "prefix_extra" "1")
15133   (set_attr "prefix" "maybe_vex")
15134   (set_attr "mode" "<sseinsnmode>")])
15135
15136(define_insn "abs<mode>2_mask"
15137  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
15138	(vec_merge:VI48_AVX512VL
15139	  (abs:VI48_AVX512VL
15140	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
15141	  (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
15142	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
15143  "TARGET_AVX512F"
15144  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15145  [(set_attr "type" "sselog1")
15146   (set_attr "prefix" "evex")
15147   (set_attr "mode" "<sseinsnmode>")])
15148
15149(define_insn "abs<mode>2_mask"
15150  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
15151	(vec_merge:VI12_AVX512VL
15152	  (abs:VI12_AVX512VL
15153	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
15154	  (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
15155	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
15156  "TARGET_AVX512BW"
15157  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15158  [(set_attr "type" "sselog1")
15159   (set_attr "prefix" "evex")
15160   (set_attr "mode" "<sseinsnmode>")])
15161
15162(define_expand "abs<mode>2"
15163  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
15164	(abs:VI1248_AVX512VL_AVX512BW
15165	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
15166  "TARGET_SSE2"
15167{
15168  if (!TARGET_SSSE3)
15169    {
15170      ix86_expand_sse2_abs (operands[0], operands[1]);
15171      DONE;
15172    }
15173})
15174
15175(define_insn "abs<mode>2"
15176  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
15177	(abs:MMXMODEI
15178	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
15179  "TARGET_SSSE3"
15180  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
15181  [(set_attr "type" "sselog1")
15182   (set_attr "prefix_rep" "0")
15183   (set_attr "prefix_extra" "1")
15184   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15185   (set_attr "mode" "DI")])
15186
15187;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15188;;
15189;; AMD SSE4A instructions
15190;;
15191;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15192
15193(define_insn "sse4a_movnt<mode>"
15194  [(set (match_operand:MODEF 0 "memory_operand" "=m")
15195	(unspec:MODEF
15196	  [(match_operand:MODEF 1 "register_operand" "x")]
15197	  UNSPEC_MOVNT))]
15198  "TARGET_SSE4A"
15199  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
15200  [(set_attr "type" "ssemov")
15201   (set_attr "mode" "<MODE>")])
15202
15203(define_insn "sse4a_vmmovnt<mode>"
15204  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
15205	(unspec:<ssescalarmode>
15206	  [(vec_select:<ssescalarmode>
15207	     (match_operand:VF_128 1 "register_operand" "x")
15208	     (parallel [(const_int 0)]))]
15209	  UNSPEC_MOVNT))]
15210  "TARGET_SSE4A"
15211  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
15212  [(set_attr "type" "ssemov")
15213   (set_attr "mode" "<ssescalarmode>")])
15214
15215(define_insn "sse4a_extrqi"
15216  [(set (match_operand:V2DI 0 "register_operand" "=x")
15217	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15218		      (match_operand 2 "const_0_to_255_operand")
15219		      (match_operand 3 "const_0_to_255_operand")]
15220		     UNSPEC_EXTRQI))]
15221  "TARGET_SSE4A"
15222  "extrq\t{%3, %2, %0|%0, %2, %3}"
15223  [(set_attr "type" "sse")
15224   (set_attr "prefix_data16" "1")
15225   (set_attr "length_immediate" "2")
15226   (set_attr "mode" "TI")])
15227
15228(define_insn "sse4a_extrq"
15229  [(set (match_operand:V2DI 0 "register_operand" "=x")
15230	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15231		      (match_operand:V16QI 2 "register_operand" "x")]
15232		     UNSPEC_EXTRQ))]
15233  "TARGET_SSE4A"
15234  "extrq\t{%2, %0|%0, %2}"
15235  [(set_attr "type" "sse")
15236   (set_attr "prefix_data16" "1")
15237   (set_attr "mode" "TI")])
15238
15239(define_insn "sse4a_insertqi"
15240  [(set (match_operand:V2DI 0 "register_operand" "=x")
15241	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15242		      (match_operand:V2DI 2 "register_operand" "x")
15243		      (match_operand 3 "const_0_to_255_operand")
15244		      (match_operand 4 "const_0_to_255_operand")]
15245		     UNSPEC_INSERTQI))]
15246  "TARGET_SSE4A"
15247  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
15248  [(set_attr "type" "sseins")
15249   (set_attr "prefix_data16" "0")
15250   (set_attr "prefix_rep" "1")
15251   (set_attr "length_immediate" "2")
15252   (set_attr "mode" "TI")])
15253
15254(define_insn "sse4a_insertq"
15255  [(set (match_operand:V2DI 0 "register_operand" "=x")
15256	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15257		      (match_operand:V2DI 2 "register_operand" "x")]
15258		     UNSPEC_INSERTQ))]
15259  "TARGET_SSE4A"
15260  "insertq\t{%2, %0|%0, %2}"
15261  [(set_attr "type" "sseins")
15262   (set_attr "prefix_data16" "0")
15263   (set_attr "prefix_rep" "1")
15264   (set_attr "mode" "TI")])
15265
15266;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15267;;
15268;; Intel SSE4.1 instructions
15269;;
15270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15271
15272;; Mapping of immediate bits for blend instructions
15273(define_mode_attr blendbits
15274  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
15275
15276(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
15277  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15278	(vec_merge:VF_128_256
15279	  (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15280	  (match_operand:VF_128_256 1 "register_operand" "0,0,x")
15281	  (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
15282  "TARGET_SSE4_1"
15283  "@
15284   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15285   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15286   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15287  [(set_attr "isa" "noavx,noavx,avx")
15288   (set_attr "type" "ssemov")
15289   (set_attr "length_immediate" "1")
15290   (set_attr "prefix_data16" "1,1,*")
15291   (set_attr "prefix_extra" "1")
15292   (set_attr "prefix" "orig,orig,vex")
15293   (set_attr "mode" "<MODE>")])
15294
15295(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
15296  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15297	(unspec:VF_128_256
15298	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15299	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15300	   (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
15301	  UNSPEC_BLENDV))]
15302  "TARGET_SSE4_1"
15303  "@
15304   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15305   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15306   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15307  [(set_attr "isa" "noavx,noavx,avx")
15308   (set_attr "type" "ssemov")
15309   (set_attr "length_immediate" "1")
15310   (set_attr "prefix_data16" "1,1,*")
15311   (set_attr "prefix_extra" "1")
15312   (set_attr "prefix" "orig,orig,vex")
15313   (set_attr "btver2_decode" "vector,vector,vector")
15314   (set_attr "mode" "<MODE>")])
15315
15316(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
15317  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15318	(unspec:VF_128_256
15319	  [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
15320	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15321	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15322	  UNSPEC_DP))]
15323  "TARGET_SSE4_1"
15324  "@
15325   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15326   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15327   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15328  [(set_attr "isa" "noavx,noavx,avx")
15329   (set_attr "type" "ssemul")
15330   (set_attr "length_immediate" "1")
15331   (set_attr "prefix_data16" "1,1,*")
15332   (set_attr "prefix_extra" "1")
15333   (set_attr "prefix" "orig,orig,vex")
15334   (set_attr "btver2_decode" "vector,vector,vector")
15335   (set_attr "znver1_decode" "vector,vector,vector")
15336   (set_attr "mode" "<MODE>")])
15337
15338;; Mode attribute used by `vmovntdqa' pattern
15339(define_mode_attr vi8_sse4_1_avx2_avx512
15340   [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
15341
15342(define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
15343  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
15344	(unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
15345		     UNSPEC_MOVNTDQA))]
15346  "TARGET_SSE4_1"
15347  "%vmovntdqa\t{%1, %0|%0, %1}"
15348  [(set_attr "isa" "noavx,noavx,avx")
15349   (set_attr "type" "ssemov")
15350   (set_attr "prefix_extra" "1,1,*")
15351   (set_attr "prefix" "orig,orig,maybe_evex")
15352   (set_attr "mode" "<sseinsnmode>")])
15353
15354(define_insn "<sse4_1_avx2>_mpsadbw"
15355  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15356	(unspec:VI1_AVX2
15357	  [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15358	   (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15359	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15360	  UNSPEC_MPSADBW))]
15361  "TARGET_SSE4_1"
15362  "@
15363   mpsadbw\t{%3, %2, %0|%0, %2, %3}
15364   mpsadbw\t{%3, %2, %0|%0, %2, %3}
15365   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15366  [(set_attr "isa" "noavx,noavx,avx")
15367   (set_attr "type" "sselog1")
15368   (set_attr "length_immediate" "1")
15369   (set_attr "prefix_extra" "1")
15370   (set_attr "prefix" "orig,orig,vex")
15371   (set_attr "btver2_decode" "vector,vector,vector")
15372   (set_attr "znver1_decode" "vector,vector,vector")
15373   (set_attr "mode" "<sseinsnmode>")])
15374
15375(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
15376  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
15377	(vec_concat:VI2_AVX2
15378	  (us_truncate:<ssehalfvecmode>
15379	    (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
15380	  (us_truncate:<ssehalfvecmode>
15381	    (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
15382  "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15383  "@
15384   packusdw\t{%2, %0|%0, %2}
15385   packusdw\t{%2, %0|%0, %2}
15386   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
15387   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15388  [(set_attr "isa" "noavx,noavx,avx,avx512bw")
15389   (set_attr "type" "sselog")
15390   (set_attr "prefix_extra" "1")
15391   (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
15392   (set_attr "mode" "<sseinsnmode>")])
15393
15394(define_insn "<sse4_1_avx2>_pblendvb"
15395  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15396	(unspec:VI1_AVX2
15397	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,0,x")
15398	   (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15399	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
15400	  UNSPEC_BLENDV))]
15401  "TARGET_SSE4_1"
15402  "@
15403   pblendvb\t{%3, %2, %0|%0, %2, %3}
15404   pblendvb\t{%3, %2, %0|%0, %2, %3}
15405   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15406  [(set_attr "isa" "noavx,noavx,avx")
15407   (set_attr "type" "ssemov")
15408   (set_attr "prefix_extra" "1")
15409   (set_attr "length_immediate" "*,*,1")
15410   (set_attr "prefix" "orig,orig,vex")
15411   (set_attr "btver2_decode" "vector,vector,vector")
15412   (set_attr "mode" "<sseinsnmode>")])
15413
15414(define_insn "sse4_1_pblendw"
15415  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15416	(vec_merge:V8HI
15417	  (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
15418	  (match_operand:V8HI 1 "register_operand" "0,0,x")
15419	  (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
15420  "TARGET_SSE4_1"
15421  "@
15422   pblendw\t{%3, %2, %0|%0, %2, %3}
15423   pblendw\t{%3, %2, %0|%0, %2, %3}
15424   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15425  [(set_attr "isa" "noavx,noavx,avx")
15426   (set_attr "type" "ssemov")
15427   (set_attr "prefix_extra" "1")
15428   (set_attr "length_immediate" "1")
15429   (set_attr "prefix" "orig,orig,vex")
15430   (set_attr "mode" "TI")])
15431
15432;; The builtin uses an 8-bit immediate.  Expand that.
15433(define_expand "avx2_pblendw"
15434  [(set (match_operand:V16HI 0 "register_operand")
15435	(vec_merge:V16HI
15436	  (match_operand:V16HI 2 "nonimmediate_operand")
15437	  (match_operand:V16HI 1 "register_operand")
15438	  (match_operand:SI 3 "const_0_to_255_operand")))]
15439  "TARGET_AVX2"
15440{
15441  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
15442  operands[3] = GEN_INT (val << 8 | val);
15443})
15444
15445(define_insn "*avx2_pblendw"
15446  [(set (match_operand:V16HI 0 "register_operand" "=x")
15447	(vec_merge:V16HI
15448	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15449	  (match_operand:V16HI 1 "register_operand" "x")
15450	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
15451  "TARGET_AVX2"
15452{
15453  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
15454  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15455}
15456  [(set_attr "type" "ssemov")
15457   (set_attr "prefix_extra" "1")
15458   (set_attr "length_immediate" "1")
15459   (set_attr "prefix" "vex")
15460   (set_attr "mode" "OI")])
15461
15462(define_insn "avx2_pblendd<mode>"
15463  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
15464	(vec_merge:VI4_AVX2
15465	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
15466	  (match_operand:VI4_AVX2 1 "register_operand" "x")
15467	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
15468  "TARGET_AVX2"
15469  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15470  [(set_attr "type" "ssemov")
15471   (set_attr "prefix_extra" "1")
15472   (set_attr "length_immediate" "1")
15473   (set_attr "prefix" "vex")
15474   (set_attr "mode" "<sseinsnmode>")])
15475
15476(define_insn "sse4_1_phminposuw"
15477  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15478	(unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
15479		     UNSPEC_PHMINPOSUW))]
15480  "TARGET_SSE4_1"
15481  "%vphminposuw\t{%1, %0|%0, %1}"
15482  [(set_attr "isa" "noavx,noavx,avx")
15483   (set_attr "type" "sselog1")
15484   (set_attr "prefix_extra" "1")
15485   (set_attr "prefix" "orig,orig,vex")
15486   (set_attr "mode" "TI")])
15487
15488(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
15489  [(set (match_operand:V16HI 0 "register_operand" "=v")
15490	(any_extend:V16HI
15491	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15492  "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15493  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15494  [(set_attr "type" "ssemov")
15495   (set_attr "prefix_extra" "1")
15496   (set_attr "prefix" "maybe_evex")
15497   (set_attr "mode" "OI")])
15498
15499(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
15500  [(set (match_operand:V32HI 0 "register_operand" "=v")
15501	(any_extend:V32HI
15502	  (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
15503  "TARGET_AVX512BW"
15504  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15505  [(set_attr "type" "ssemov")
15506   (set_attr "prefix_extra" "1")
15507   (set_attr "prefix" "evex")
15508   (set_attr "mode" "XI")])
15509
15510(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
15511  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
15512	(any_extend:V8HI
15513	  (vec_select:V8QI
15514	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15515	    (parallel [(const_int 0) (const_int 1)
15516		       (const_int 2) (const_int 3)
15517		       (const_int 4) (const_int 5)
15518		       (const_int 6) (const_int 7)]))))]
15519  "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15520  "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15521  [(set_attr "isa" "noavx,noavx,avx")
15522   (set_attr "type" "ssemov")
15523   (set_attr "prefix_extra" "1")
15524   (set_attr "prefix" "orig,orig,maybe_evex")
15525   (set_attr "mode" "TI")])
15526
15527(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
15528  [(set (match_operand:V16SI 0 "register_operand" "=v")
15529	(any_extend:V16SI
15530	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15531  "TARGET_AVX512F"
15532  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15533  [(set_attr "type" "ssemov")
15534   (set_attr "prefix" "evex")
15535   (set_attr "mode" "XI")])
15536
15537(define_insn "avx2_<code>v8qiv8si2<mask_name>"
15538  [(set (match_operand:V8SI 0 "register_operand" "=v")
15539	(any_extend:V8SI
15540	  (vec_select:V8QI
15541	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15542	    (parallel [(const_int 0) (const_int 1)
15543		       (const_int 2) (const_int 3)
15544		       (const_int 4) (const_int 5)
15545		       (const_int 6) (const_int 7)]))))]
15546  "TARGET_AVX2 && <mask_avx512vl_condition>"
15547  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15548  [(set_attr "type" "ssemov")
15549   (set_attr "prefix_extra" "1")
15550   (set_attr "prefix" "maybe_evex")
15551   (set_attr "mode" "OI")])
15552
15553(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
15554  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15555	(any_extend:V4SI
15556	  (vec_select:V4QI
15557	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15558	    (parallel [(const_int 0) (const_int 1)
15559		       (const_int 2) (const_int 3)]))))]
15560  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15561  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15562  [(set_attr "isa" "noavx,noavx,avx")
15563   (set_attr "type" "ssemov")
15564   (set_attr "prefix_extra" "1")
15565   (set_attr "prefix" "orig,orig,maybe_evex")
15566   (set_attr "mode" "TI")])
15567
15568(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
15569  [(set (match_operand:V16SI 0 "register_operand" "=v")
15570	(any_extend:V16SI
15571	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
15572  "TARGET_AVX512F"
15573  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15574  [(set_attr "type" "ssemov")
15575   (set_attr "prefix" "evex")
15576   (set_attr "mode" "XI")])
15577
15578(define_insn "avx2_<code>v8hiv8si2<mask_name>"
15579  [(set (match_operand:V8SI 0 "register_operand" "=v")
15580	(any_extend:V8SI
15581	    (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15582  "TARGET_AVX2 && <mask_avx512vl_condition>"
15583  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15584  [(set_attr "type" "ssemov")
15585   (set_attr "prefix_extra" "1")
15586   (set_attr "prefix" "maybe_evex")
15587   (set_attr "mode" "OI")])
15588
15589(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
15590  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15591	(any_extend:V4SI
15592	  (vec_select:V4HI
15593	    (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15594	    (parallel [(const_int 0) (const_int 1)
15595		       (const_int 2) (const_int 3)]))))]
15596  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15597  "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15598  [(set_attr "isa" "noavx,noavx,avx")
15599   (set_attr "type" "ssemov")
15600   (set_attr "prefix_extra" "1")
15601   (set_attr "prefix" "orig,orig,maybe_evex")
15602   (set_attr "mode" "TI")])
15603
15604(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
15605  [(set (match_operand:V8DI 0 "register_operand" "=v")
15606	(any_extend:V8DI
15607	  (vec_select:V8QI
15608	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15609	    (parallel [(const_int 0) (const_int 1)
15610		       (const_int 2) (const_int 3)
15611		       (const_int 4) (const_int 5)
15612		       (const_int 6) (const_int 7)]))))]
15613  "TARGET_AVX512F"
15614  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15615  [(set_attr "type" "ssemov")
15616   (set_attr "prefix" "evex")
15617   (set_attr "mode" "XI")])
15618
15619(define_insn "avx2_<code>v4qiv4di2<mask_name>"
15620  [(set (match_operand:V4DI 0 "register_operand" "=v")
15621	(any_extend:V4DI
15622	  (vec_select:V4QI
15623	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15624	    (parallel [(const_int 0) (const_int 1)
15625		       (const_int 2) (const_int 3)]))))]
15626  "TARGET_AVX2 && <mask_avx512vl_condition>"
15627  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15628  [(set_attr "type" "ssemov")
15629   (set_attr "prefix_extra" "1")
15630   (set_attr "prefix" "maybe_evex")
15631   (set_attr "mode" "OI")])
15632
15633(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
15634  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15635	(any_extend:V2DI
15636	  (vec_select:V2QI
15637	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15638	    (parallel [(const_int 0) (const_int 1)]))))]
15639  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15640  "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
15641  [(set_attr "isa" "noavx,noavx,avx")
15642   (set_attr "type" "ssemov")
15643   (set_attr "prefix_extra" "1")
15644   (set_attr "prefix" "orig,orig,maybe_evex")
15645   (set_attr "mode" "TI")])
15646
15647(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
15648  [(set (match_operand:V8DI 0 "register_operand" "=v")
15649	(any_extend:V8DI
15650	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15651  "TARGET_AVX512F"
15652  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15653  [(set_attr "type" "ssemov")
15654   (set_attr "prefix" "evex")
15655   (set_attr "mode" "XI")])
15656
15657(define_insn "avx2_<code>v4hiv4di2<mask_name>"
15658  [(set (match_operand:V4DI 0 "register_operand" "=v")
15659	(any_extend:V4DI
15660	  (vec_select:V4HI
15661	    (match_operand:V8HI 1 "nonimmediate_operand" "vm")
15662	    (parallel [(const_int 0) (const_int 1)
15663		       (const_int 2) (const_int 3)]))))]
15664  "TARGET_AVX2 && <mask_avx512vl_condition>"
15665  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15666  [(set_attr "type" "ssemov")
15667   (set_attr "prefix_extra" "1")
15668   (set_attr "prefix" "maybe_evex")
15669   (set_attr "mode" "OI")])
15670
15671(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
15672  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15673	(any_extend:V2DI
15674	  (vec_select:V2HI
15675	    (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15676	    (parallel [(const_int 0) (const_int 1)]))))]
15677  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15678  "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15679  [(set_attr "isa" "noavx,noavx,avx")
15680   (set_attr "type" "ssemov")
15681   (set_attr "prefix_extra" "1")
15682   (set_attr "prefix" "orig,orig,maybe_evex")
15683   (set_attr "mode" "TI")])
15684
15685(define_insn "avx512f_<code>v8siv8di2<mask_name>"
15686  [(set (match_operand:V8DI 0 "register_operand" "=v")
15687	(any_extend:V8DI
15688	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15689  "TARGET_AVX512F"
15690  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15691  [(set_attr "type" "ssemov")
15692   (set_attr "prefix" "evex")
15693   (set_attr "mode" "XI")])
15694
15695(define_insn "avx2_<code>v4siv4di2<mask_name>"
15696  [(set (match_operand:V4DI 0 "register_operand" "=v")
15697	(any_extend:V4DI
15698	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15699  "TARGET_AVX2 && <mask_avx512vl_condition>"
15700  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15701  [(set_attr "type" "ssemov")
15702   (set_attr "prefix" "maybe_evex")
15703   (set_attr "prefix_extra" "1")
15704   (set_attr "mode" "OI")])
15705
15706(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
15707  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15708	(any_extend:V2DI
15709	  (vec_select:V2SI
15710	    (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15711	    (parallel [(const_int 0) (const_int 1)]))))]
15712  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15713  "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15714  [(set_attr "isa" "noavx,noavx,avx")
15715   (set_attr "type" "ssemov")
15716   (set_attr "prefix_extra" "1")
15717   (set_attr "prefix" "orig,orig,maybe_evex")
15718   (set_attr "mode" "TI")])
15719
15720;; ptestps/ptestpd are very similar to comiss and ucomiss when
15721;; setting FLAGS_REG. But it is not a really compare instruction.
15722(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
15723  [(set (reg:CC FLAGS_REG)
15724	(unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15725		    (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
15726		   UNSPEC_VTESTP))]
15727  "TARGET_AVX"
15728  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
15729  [(set_attr "type" "ssecomi")
15730   (set_attr "prefix_extra" "1")
15731   (set_attr "prefix" "vex")
15732   (set_attr "mode" "<MODE>")])
15733
15734;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15735;; But it is not a really compare instruction.
15736(define_insn "<sse4_1>_ptest<mode>"
15737  [(set (reg:CC FLAGS_REG)
15738	(unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15739		    (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
15740		   UNSPEC_PTEST))]
15741  "TARGET_SSE4_1"
15742  "%vptest\t{%1, %0|%0, %1}"
15743  [(set_attr "isa" "noavx,noavx,avx")
15744   (set_attr "type" "ssecomi")
15745   (set_attr "prefix_extra" "1")
15746   (set_attr "prefix" "orig,orig,vex")
15747   (set (attr "btver2_decode")
15748     (if_then_else
15749       (match_test "<sseinsnmode>mode==OImode")
15750     (const_string "vector")
15751     (const_string "*")))
15752   (set_attr "mode" "<sseinsnmode>")])
15753
15754(define_insn "ptesttf2"
15755  [(set (reg:CC FLAGS_REG)
15756	(unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
15757		    (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
15758		   UNSPEC_PTEST))]
15759  "TARGET_SSE4_1"
15760  "%vptest\t{%1, %0|%0, %1}"
15761  [(set_attr "isa" "noavx,noavx,avx")
15762   (set_attr "type" "ssecomi")
15763   (set_attr "prefix_extra" "1")
15764   (set_attr "prefix" "orig,orig,vex")
15765   (set_attr "mode" "TI")])
15766
15767(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
15768  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15769	(unspec:VF_128_256
15770	  [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
15771	   (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
15772	  UNSPEC_ROUND))]
15773  "TARGET_SSE4_1"
15774  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15775  [(set_attr "isa" "noavx,noavx,avx")
15776   (set_attr "type" "ssecvt")
15777   (set_attr "prefix_data16" "1,1,*")
15778   (set_attr "prefix_extra" "1")
15779   (set_attr "length_immediate" "1")
15780   (set_attr "prefix" "orig,orig,vex")
15781   (set_attr "mode" "<MODE>")])
15782
15783(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
15784  [(match_operand:<sseintvecmode> 0 "register_operand")
15785   (match_operand:VF1_128_256 1 "vector_operand")
15786   (match_operand:SI 2 "const_0_to_15_operand")]
15787  "TARGET_SSE4_1"
15788{
15789  rtx tmp = gen_reg_rtx (<MODE>mode);
15790
15791  emit_insn
15792    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15793						       operands[2]));
15794  emit_insn
15795    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15796  DONE;
15797})
15798
15799(define_expand "avx512f_round<castmode>512"
15800  [(match_operand:VF_512 0 "register_operand")
15801   (match_operand:VF_512 1 "nonimmediate_operand")
15802   (match_operand:SI 2 "const_0_to_15_operand")]
15803  "TARGET_AVX512F"
15804{
15805  emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
15806  DONE;
15807})
15808
15809(define_expand "avx512f_roundps512_sfix"
15810  [(match_operand:V16SI 0 "register_operand")
15811   (match_operand:V16SF 1 "nonimmediate_operand")
15812   (match_operand:SI 2 "const_0_to_15_operand")]
15813  "TARGET_AVX512F"
15814{
15815  rtx tmp = gen_reg_rtx (V16SFmode);
15816  emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
15817  emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
15818  DONE;
15819})
15820
15821(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
15822  [(match_operand:<ssepackfltmode> 0 "register_operand")
15823   (match_operand:VF2 1 "vector_operand")
15824   (match_operand:VF2 2 "vector_operand")
15825   (match_operand:SI 3 "const_0_to_15_operand")]
15826  "TARGET_SSE4_1"
15827{
15828  rtx tmp0, tmp1;
15829
15830  if (<MODE>mode == V2DFmode
15831      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15832    {
15833      rtx tmp2 = gen_reg_rtx (V4DFmode);
15834
15835      tmp0 = gen_reg_rtx (V4DFmode);
15836      tmp1 = force_reg (V2DFmode, operands[1]);
15837
15838      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15839      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15840      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15841    }
15842  else
15843    {
15844      tmp0 = gen_reg_rtx (<MODE>mode);
15845      tmp1 = gen_reg_rtx (<MODE>mode);
15846
15847      emit_insn
15848       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15849							  operands[3]));
15850      emit_insn
15851       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15852							  operands[3]));
15853      emit_insn
15854       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15855    }
15856  DONE;
15857})
15858
15859(define_insn "sse4_1_round<ssescalarmodesuffix>"
15860  [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
15861	(vec_merge:VF_128
15862	  (unspec:VF_128
15863	    [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
15864	     (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
15865	    UNSPEC_ROUND)
15866	  (match_operand:VF_128 1 "register_operand" "0,0,x,v")
15867	  (const_int 1)))]
15868  "TARGET_SSE4_1"
15869  "@
15870   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15871   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15872   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
15873   vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15874  [(set_attr "isa" "noavx,noavx,avx,avx512f")
15875   (set_attr "type" "ssecvt")
15876   (set_attr "length_immediate" "1")
15877   (set_attr "prefix_data16" "1,1,*,*")
15878   (set_attr "prefix_extra" "1")
15879   (set_attr "prefix" "orig,orig,vex,evex")
15880   (set_attr "mode" "<MODE>")])
15881
15882(define_expand "round<mode>2"
15883  [(set (match_dup 3)
15884	(plus:VF
15885	  (match_operand:VF 1 "register_operand")
15886	  (match_dup 2)))
15887   (set (match_operand:VF 0 "register_operand")
15888	(unspec:VF
15889	  [(match_dup 3) (match_dup 4)]
15890	  UNSPEC_ROUND))]
15891  "TARGET_SSE4_1 && !flag_trapping_math"
15892{
15893  machine_mode scalar_mode;
15894  const struct real_format *fmt;
15895  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15896  rtx half, vec_half;
15897
15898  scalar_mode = GET_MODE_INNER (<MODE>mode);
15899
15900  /* load nextafter (0.5, 0.0) */
15901  fmt = REAL_MODE_FORMAT (scalar_mode);
15902  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
15903  real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
15904  half = const_double_from_real_value (pred_half, scalar_mode);
15905
15906  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15907  vec_half = force_reg (<MODE>mode, vec_half);
15908
15909  operands[2] = gen_reg_rtx (<MODE>mode);
15910  emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
15911
15912  operands[3] = gen_reg_rtx (<MODE>mode);
15913  operands[4] = GEN_INT (ROUND_TRUNC);
15914})
15915
15916(define_expand "round<mode>2_sfix"
15917  [(match_operand:<sseintvecmode> 0 "register_operand")
15918   (match_operand:VF1 1 "register_operand")]
15919  "TARGET_SSE4_1 && !flag_trapping_math"
15920{
15921  rtx tmp = gen_reg_rtx (<MODE>mode);
15922
15923  emit_insn (gen_round<mode>2 (tmp, operands[1]));
15924
15925  emit_insn
15926    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15927  DONE;
15928})
15929
15930(define_expand "round<mode>2_vec_pack_sfix"
15931  [(match_operand:<ssepackfltmode> 0 "register_operand")
15932   (match_operand:VF2 1 "register_operand")
15933   (match_operand:VF2 2 "register_operand")]
15934  "TARGET_SSE4_1 && !flag_trapping_math"
15935{
15936  rtx tmp0, tmp1;
15937
15938  if (<MODE>mode == V2DFmode
15939      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15940    {
15941      rtx tmp2 = gen_reg_rtx (V4DFmode);
15942
15943      tmp0 = gen_reg_rtx (V4DFmode);
15944      tmp1 = force_reg (V2DFmode, operands[1]);
15945
15946      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15947      emit_insn (gen_roundv4df2 (tmp2, tmp0));
15948      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15949    }
15950  else
15951    {
15952      tmp0 = gen_reg_rtx (<MODE>mode);
15953      tmp1 = gen_reg_rtx (<MODE>mode);
15954
15955      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15956      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15957
15958      emit_insn
15959       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15960    }
15961  DONE;
15962})
15963
15964;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15965;;
15966;; Intel SSE4.2 string/text processing instructions
15967;;
15968;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15969
15970(define_insn_and_split "sse4_2_pcmpestr"
15971  [(set (match_operand:SI 0 "register_operand" "=c,c")
15972	(unspec:SI
15973	  [(match_operand:V16QI 2 "register_operand" "x,x")
15974	   (match_operand:SI 3 "register_operand" "a,a")
15975	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15976	   (match_operand:SI 5 "register_operand" "d,d")
15977	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15978	  UNSPEC_PCMPESTR))
15979   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15980	(unspec:V16QI
15981	  [(match_dup 2)
15982	   (match_dup 3)
15983	   (match_dup 4)
15984	   (match_dup 5)
15985	   (match_dup 6)]
15986	  UNSPEC_PCMPESTR))
15987   (set (reg:CC FLAGS_REG)
15988	(unspec:CC
15989	  [(match_dup 2)
15990	   (match_dup 3)
15991	   (match_dup 4)
15992	   (match_dup 5)
15993	   (match_dup 6)]
15994	  UNSPEC_PCMPESTR))]
15995  "TARGET_SSE4_2
15996   && can_create_pseudo_p ()"
15997  "#"
15998  "&& 1"
15999  [(const_int 0)]
16000{
16001  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
16002  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
16003  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
16004
16005  if (ecx)
16006    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
16007				     operands[3], operands[4],
16008				     operands[5], operands[6]));
16009  if (xmm0)
16010    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
16011				     operands[3], operands[4],
16012				     operands[5], operands[6]));
16013  if (flags && !(ecx || xmm0))
16014    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
16015					   operands[2], operands[3],
16016					   operands[4], operands[5],
16017					   operands[6]));
16018  if (!(flags || ecx || xmm0))
16019    emit_note (NOTE_INSN_DELETED);
16020
16021  DONE;
16022}
16023  [(set_attr "type" "sselog")
16024   (set_attr "prefix_data16" "1")
16025   (set_attr "prefix_extra" "1")
16026   (set_attr "length_immediate" "1")
16027   (set_attr "memory" "none,load")
16028   (set_attr "mode" "TI")])
16029
16030(define_insn "sse4_2_pcmpestri"
16031  [(set (match_operand:SI 0 "register_operand" "=c,c")
16032	(unspec:SI
16033	  [(match_operand:V16QI 1 "register_operand" "x,x")
16034	   (match_operand:SI 2 "register_operand" "a,a")
16035	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
16036	   (match_operand:SI 4 "register_operand" "d,d")
16037	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
16038	  UNSPEC_PCMPESTR))
16039   (set (reg:CC FLAGS_REG)
16040	(unspec:CC
16041	  [(match_dup 1)
16042	   (match_dup 2)
16043	   (match_dup 3)
16044	   (match_dup 4)
16045	   (match_dup 5)]
16046	  UNSPEC_PCMPESTR))]
16047  "TARGET_SSE4_2"
16048  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
16049  [(set_attr "type" "sselog")
16050   (set_attr "prefix_data16" "1")
16051   (set_attr "prefix_extra" "1")
16052   (set_attr "prefix" "maybe_vex")
16053   (set_attr "length_immediate" "1")
16054   (set_attr "btver2_decode" "vector")
16055   (set_attr "memory" "none,load")
16056   (set_attr "mode" "TI")])
16057
16058(define_insn "sse4_2_pcmpestrm"
16059  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
16060	(unspec:V16QI
16061	  [(match_operand:V16QI 1 "register_operand" "x,x")
16062	   (match_operand:SI 2 "register_operand" "a,a")
16063	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
16064	   (match_operand:SI 4 "register_operand" "d,d")
16065	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
16066	  UNSPEC_PCMPESTR))
16067   (set (reg:CC FLAGS_REG)
16068	(unspec:CC
16069	  [(match_dup 1)
16070	   (match_dup 2)
16071	   (match_dup 3)
16072	   (match_dup 4)
16073	   (match_dup 5)]
16074	  UNSPEC_PCMPESTR))]
16075  "TARGET_SSE4_2"
16076  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
16077  [(set_attr "type" "sselog")
16078   (set_attr "prefix_data16" "1")
16079   (set_attr "prefix_extra" "1")
16080   (set_attr "length_immediate" "1")
16081   (set_attr "prefix" "maybe_vex")
16082   (set_attr "btver2_decode" "vector")
16083   (set_attr "memory" "none,load")
16084   (set_attr "mode" "TI")])
16085
16086(define_insn "sse4_2_pcmpestr_cconly"
16087  [(set (reg:CC FLAGS_REG)
16088	(unspec:CC
16089	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16090	   (match_operand:SI 3 "register_operand" "a,a,a,a")
16091	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
16092	   (match_operand:SI 5 "register_operand" "d,d,d,d")
16093	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
16094	  UNSPEC_PCMPESTR))
16095   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16096   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
16097  "TARGET_SSE4_2"
16098  "@
16099   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
16100   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
16101   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
16102   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
16103  [(set_attr "type" "sselog")
16104   (set_attr "prefix_data16" "1")
16105   (set_attr "prefix_extra" "1")
16106   (set_attr "length_immediate" "1")
16107   (set_attr "memory" "none,load,none,load")
16108   (set_attr "btver2_decode" "vector,vector,vector,vector")
16109   (set_attr "prefix" "maybe_vex")
16110   (set_attr "mode" "TI")])
16111
16112(define_insn_and_split "sse4_2_pcmpistr"
16113  [(set (match_operand:SI 0 "register_operand" "=c,c")
16114	(unspec:SI
16115	  [(match_operand:V16QI 2 "register_operand" "x,x")
16116	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
16117	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
16118	  UNSPEC_PCMPISTR))
16119   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
16120	(unspec:V16QI
16121	  [(match_dup 2)
16122	   (match_dup 3)
16123	   (match_dup 4)]
16124	  UNSPEC_PCMPISTR))
16125   (set (reg:CC FLAGS_REG)
16126	(unspec:CC
16127	  [(match_dup 2)
16128	   (match_dup 3)
16129	   (match_dup 4)]
16130	  UNSPEC_PCMPISTR))]
16131  "TARGET_SSE4_2
16132   && can_create_pseudo_p ()"
16133  "#"
16134  "&& 1"
16135  [(const_int 0)]
16136{
16137  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
16138  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
16139  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
16140
16141  if (ecx)
16142    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
16143				     operands[3], operands[4]));
16144  if (xmm0)
16145    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
16146				     operands[3], operands[4]));
16147  if (flags && !(ecx || xmm0))
16148    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
16149					   operands[2], operands[3],
16150					   operands[4]));
16151  if (!(flags || ecx || xmm0))
16152    emit_note (NOTE_INSN_DELETED);
16153
16154  DONE;
16155}
16156  [(set_attr "type" "sselog")
16157   (set_attr "prefix_data16" "1")
16158   (set_attr "prefix_extra" "1")
16159   (set_attr "length_immediate" "1")
16160   (set_attr "memory" "none,load")
16161   (set_attr "mode" "TI")])
16162
16163(define_insn "sse4_2_pcmpistri"
16164  [(set (match_operand:SI 0 "register_operand" "=c,c")
16165	(unspec:SI
16166	  [(match_operand:V16QI 1 "register_operand" "x,x")
16167	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16168	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16169	  UNSPEC_PCMPISTR))
16170   (set (reg:CC FLAGS_REG)
16171	(unspec:CC
16172	  [(match_dup 1)
16173	   (match_dup 2)
16174	   (match_dup 3)]
16175	  UNSPEC_PCMPISTR))]
16176  "TARGET_SSE4_2"
16177  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
16178  [(set_attr "type" "sselog")
16179   (set_attr "prefix_data16" "1")
16180   (set_attr "prefix_extra" "1")
16181   (set_attr "length_immediate" "1")
16182   (set_attr "prefix" "maybe_vex")
16183   (set_attr "memory" "none,load")
16184   (set_attr "btver2_decode" "vector")
16185   (set_attr "mode" "TI")])
16186
16187(define_insn "sse4_2_pcmpistrm"
16188  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
16189	(unspec:V16QI
16190	  [(match_operand:V16QI 1 "register_operand" "x,x")
16191	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16192	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16193	  UNSPEC_PCMPISTR))
16194   (set (reg:CC FLAGS_REG)
16195	(unspec:CC
16196	  [(match_dup 1)
16197	   (match_dup 2)
16198	   (match_dup 3)]
16199	  UNSPEC_PCMPISTR))]
16200  "TARGET_SSE4_2"
16201  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
16202  [(set_attr "type" "sselog")
16203   (set_attr "prefix_data16" "1")
16204   (set_attr "prefix_extra" "1")
16205   (set_attr "length_immediate" "1")
16206   (set_attr "prefix" "maybe_vex")
16207   (set_attr "memory" "none,load")
16208   (set_attr "btver2_decode" "vector")
16209   (set_attr "mode" "TI")])
16210
16211(define_insn "sse4_2_pcmpistr_cconly"
16212  [(set (reg:CC FLAGS_REG)
16213	(unspec:CC
16214	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16215	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
16216	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
16217	  UNSPEC_PCMPISTR))
16218   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16219   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
16220  "TARGET_SSE4_2"
16221  "@
16222   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16223   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16224   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
16225   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
16226  [(set_attr "type" "sselog")
16227   (set_attr "prefix_data16" "1")
16228   (set_attr "prefix_extra" "1")
16229   (set_attr "length_immediate" "1")
16230   (set_attr "memory" "none,load,none,load")
16231   (set_attr "prefix" "maybe_vex")
16232   (set_attr "btver2_decode" "vector,vector,vector,vector")
16233   (set_attr "mode" "TI")])
16234
16235;; Packed float variants
16236(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
16237		      [(V8DI "V8SF") (V16SI "V16SF")])
16238
16239(define_expand "avx512pf_gatherpf<mode>sf"
16240  [(unspec
16241     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16242      (mem:<GATHER_SCATTER_SF_MEM_MODE>
16243	(match_par_dup 5
16244	  [(match_operand 2 "vsib_address_operand")
16245	   (match_operand:VI48_512 1 "register_operand")
16246	   (match_operand:SI 3 "const1248_operand")]))
16247      (match_operand:SI 4 "const_2_to_3_operand")]
16248     UNSPEC_GATHER_PREFETCH)]
16249  "TARGET_AVX512PF"
16250{
16251  operands[5]
16252    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16253					operands[3]), UNSPEC_VSIBADDR);
16254})
16255
16256(define_insn "*avx512pf_gatherpf<mode>sf_mask"
16257  [(unspec
16258     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16259      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16260	[(unspec:P
16261	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16262	    (match_operand:VI48_512 1 "register_operand" "v")
16263	    (match_operand:SI 3 "const1248_operand" "n")]
16264	   UNSPEC_VSIBADDR)])
16265      (match_operand:SI 4 "const_2_to_3_operand" "n")]
16266     UNSPEC_GATHER_PREFETCH)]
16267  "TARGET_AVX512PF"
16268{
16269  switch (INTVAL (operands[4]))
16270    {
16271    case 3:
16272      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
16273	 gas changed what it requires incompatibly.  */
16274      return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
16275    case 2:
16276      return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
16277    default:
16278      gcc_unreachable ();
16279    }
16280}
16281  [(set_attr "type" "sse")
16282   (set_attr "prefix" "evex")
16283   (set_attr "mode" "XI")])
16284
16285;; Packed double variants
16286(define_expand "avx512pf_gatherpf<mode>df"
16287  [(unspec
16288     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16289      (mem:V8DF
16290	(match_par_dup 5
16291	  [(match_operand 2 "vsib_address_operand")
16292	   (match_operand:VI4_256_8_512 1 "register_operand")
16293	   (match_operand:SI 3 "const1248_operand")]))
16294      (match_operand:SI 4 "const_2_to_3_operand")]
16295     UNSPEC_GATHER_PREFETCH)]
16296  "TARGET_AVX512PF"
16297{
16298  operands[5]
16299    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16300					operands[3]), UNSPEC_VSIBADDR);
16301})
16302
16303(define_insn "*avx512pf_gatherpf<mode>df_mask"
16304  [(unspec
16305     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16306      (match_operator:V8DF 5 "vsib_mem_operator"
16307	[(unspec:P
16308	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16309	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
16310	    (match_operand:SI 3 "const1248_operand" "n")]
16311	   UNSPEC_VSIBADDR)])
16312      (match_operand:SI 4 "const_2_to_3_operand" "n")]
16313     UNSPEC_GATHER_PREFETCH)]
16314  "TARGET_AVX512PF"
16315{
16316  switch (INTVAL (operands[4]))
16317    {
16318    case 3:
16319      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
16320	 gas changed what it requires incompatibly.  */
16321      return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
16322    case 2:
16323      return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
16324    default:
16325      gcc_unreachable ();
16326    }
16327}
16328  [(set_attr "type" "sse")
16329   (set_attr "prefix" "evex")
16330   (set_attr "mode" "XI")])
16331
16332;; Packed float variants
16333(define_expand "avx512pf_scatterpf<mode>sf"
16334  [(unspec
16335     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16336      (mem:<GATHER_SCATTER_SF_MEM_MODE>
16337	(match_par_dup 5
16338	  [(match_operand 2 "vsib_address_operand")
16339	   (match_operand:VI48_512 1 "register_operand")
16340	   (match_operand:SI 3 "const1248_operand")]))
16341      (match_operand:SI 4 "const2367_operand")]
16342     UNSPEC_SCATTER_PREFETCH)]
16343  "TARGET_AVX512PF"
16344{
16345  operands[5]
16346    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16347					operands[3]), UNSPEC_VSIBADDR);
16348})
16349
16350(define_insn "*avx512pf_scatterpf<mode>sf_mask"
16351  [(unspec
16352     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16353      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16354	[(unspec:P
16355	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16356	    (match_operand:VI48_512 1 "register_operand" "v")
16357	    (match_operand:SI 3 "const1248_operand" "n")]
16358	   UNSPEC_VSIBADDR)])
16359      (match_operand:SI 4 "const2367_operand" "n")]
16360     UNSPEC_SCATTER_PREFETCH)]
16361  "TARGET_AVX512PF"
16362{
16363  switch (INTVAL (operands[4]))
16364    {
16365    case 3:
16366    case 7:
16367      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
16368	 gas changed what it requires incompatibly.  */
16369      return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
16370    case 2:
16371    case 6:
16372      return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
16373    default:
16374      gcc_unreachable ();
16375    }
16376}
16377  [(set_attr "type" "sse")
16378   (set_attr "prefix" "evex")
16379   (set_attr "mode" "XI")])
16380
16381;; Packed double variants
16382(define_expand "avx512pf_scatterpf<mode>df"
16383  [(unspec
16384     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16385      (mem:V8DF
16386	(match_par_dup 5
16387	  [(match_operand 2 "vsib_address_operand")
16388	   (match_operand:VI4_256_8_512 1 "register_operand")
16389	   (match_operand:SI 3 "const1248_operand")]))
16390      (match_operand:SI 4 "const2367_operand")]
16391     UNSPEC_SCATTER_PREFETCH)]
16392  "TARGET_AVX512PF"
16393{
16394  operands[5]
16395    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16396					operands[3]), UNSPEC_VSIBADDR);
16397})
16398
16399(define_insn "*avx512pf_scatterpf<mode>df_mask"
16400  [(unspec
16401     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16402      (match_operator:V8DF 5 "vsib_mem_operator"
16403	[(unspec:P
16404	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16405	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
16406	    (match_operand:SI 3 "const1248_operand" "n")]
16407	   UNSPEC_VSIBADDR)])
16408      (match_operand:SI 4 "const2367_operand" "n")]
16409     UNSPEC_SCATTER_PREFETCH)]
16410  "TARGET_AVX512PF"
16411{
16412  switch (INTVAL (operands[4]))
16413    {
16414    case 3:
16415    case 7:
16416      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
16417	 gas changed what it requires incompatibly.  */
16418      return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
16419    case 2:
16420    case 6:
16421      return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
16422    default:
16423      gcc_unreachable ();
16424    }
16425}
16426  [(set_attr "type" "sse")
16427   (set_attr "prefix" "evex")
16428   (set_attr "mode" "XI")])
16429
16430(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
16431  [(set (match_operand:VF_512 0 "register_operand" "=v")
16432	(unspec:VF_512
16433	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16434	  UNSPEC_EXP2))]
16435  "TARGET_AVX512ER"
16436  "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16437  [(set_attr "prefix" "evex")
16438   (set_attr "type" "sse")
16439   (set_attr "mode" "<MODE>")])
16440
16441(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
16442  [(set (match_operand:VF_512 0 "register_operand" "=v")
16443	(unspec:VF_512
16444	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16445	  UNSPEC_RCP28))]
16446  "TARGET_AVX512ER"
16447  "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16448  [(set_attr "prefix" "evex")
16449   (set_attr "type" "sse")
16450   (set_attr "mode" "<MODE>")])
16451
16452(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
16453  [(set (match_operand:VF_128 0 "register_operand" "=v")
16454	(vec_merge:VF_128
16455	  (unspec:VF_128
16456	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16457	    UNSPEC_RCP28)
16458	  (match_operand:VF_128 2 "register_operand" "v")
16459	  (const_int 1)))]
16460  "TARGET_AVX512ER"
16461  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
16462  [(set_attr "length_immediate" "1")
16463   (set_attr "prefix" "evex")
16464   (set_attr "type" "sse")
16465   (set_attr "mode" "<MODE>")])
16466
16467(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
16468  [(set (match_operand:VF_512 0 "register_operand" "=v")
16469	(unspec:VF_512
16470	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16471	  UNSPEC_RSQRT28))]
16472  "TARGET_AVX512ER"
16473  "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16474  [(set_attr "prefix" "evex")
16475   (set_attr "type" "sse")
16476   (set_attr "mode" "<MODE>")])
16477
16478(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16479  [(set (match_operand:VF_128 0 "register_operand" "=v")
16480	(vec_merge:VF_128
16481	  (unspec:VF_128
16482	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16483	    UNSPEC_RSQRT28)
16484	  (match_operand:VF_128 2 "register_operand" "v")
16485	  (const_int 1)))]
16486  "TARGET_AVX512ER"
16487  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
16488  [(set_attr "length_immediate" "1")
16489   (set_attr "type" "sse")
16490   (set_attr "prefix" "evex")
16491   (set_attr "mode" "<MODE>")])
16492
16493;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16494;;
16495;; XOP instructions
16496;;
16497;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16498
16499(define_code_iterator xop_plus [plus ss_plus])
16500
16501(define_code_attr macs [(plus "macs") (ss_plus "macss")])
16502(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16503
16504;; XOP parallel integer multiply/add instructions.
16505
16506(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16507  [(set (match_operand:VI24_128 0 "register_operand" "=x")
16508	(xop_plus:VI24_128
16509	 (mult:VI24_128
16510	  (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16511	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
16512	 (match_operand:VI24_128 3 "register_operand" "x")))]
16513  "TARGET_XOP"
16514  "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16515  [(set_attr "type" "ssemuladd")
16516   (set_attr "mode" "TI")])
16517
16518(define_insn "xop_p<macs>dql"
16519  [(set (match_operand:V2DI 0 "register_operand" "=x")
16520	(xop_plus:V2DI
16521	 (mult:V2DI
16522	  (sign_extend:V2DI
16523	   (vec_select:V2SI
16524	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16525	    (parallel [(const_int 0) (const_int 2)])))
16526	  (sign_extend:V2DI
16527	   (vec_select:V2SI
16528	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16529	    (parallel [(const_int 0) (const_int 2)]))))
16530	 (match_operand:V2DI 3 "register_operand" "x")))]
16531  "TARGET_XOP"
16532  "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16533  [(set_attr "type" "ssemuladd")
16534   (set_attr "mode" "TI")])
16535
16536(define_insn "xop_p<macs>dqh"
16537  [(set (match_operand:V2DI 0 "register_operand" "=x")
16538	(xop_plus:V2DI
16539	 (mult:V2DI
16540	  (sign_extend:V2DI
16541	   (vec_select:V2SI
16542	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16543	    (parallel [(const_int 1) (const_int 3)])))
16544	  (sign_extend:V2DI
16545	   (vec_select:V2SI
16546	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16547	    (parallel [(const_int 1) (const_int 3)]))))
16548	 (match_operand:V2DI 3 "register_operand" "x")))]
16549  "TARGET_XOP"
16550  "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16551  [(set_attr "type" "ssemuladd")
16552   (set_attr "mode" "TI")])
16553
16554;; XOP parallel integer multiply/add instructions for the intrinisics
16555(define_insn "xop_p<macs>wd"
16556  [(set (match_operand:V4SI 0 "register_operand" "=x")
16557	(xop_plus:V4SI
16558	 (mult:V4SI
16559	  (sign_extend:V4SI
16560	   (vec_select:V4HI
16561	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16562	    (parallel [(const_int 1) (const_int 3)
16563		       (const_int 5) (const_int 7)])))
16564	  (sign_extend:V4SI
16565	   (vec_select:V4HI
16566	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16567	    (parallel [(const_int 1) (const_int 3)
16568		       (const_int 5) (const_int 7)]))))
16569	 (match_operand:V4SI 3 "register_operand" "x")))]
16570  "TARGET_XOP"
16571  "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16572  [(set_attr "type" "ssemuladd")
16573   (set_attr "mode" "TI")])
16574
16575(define_insn "xop_p<madcs>wd"
16576  [(set (match_operand:V4SI 0 "register_operand" "=x")
16577	(xop_plus:V4SI
16578	 (plus:V4SI
16579	  (mult:V4SI
16580	   (sign_extend:V4SI
16581	    (vec_select:V4HI
16582	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16583	     (parallel [(const_int 0) (const_int 2)
16584			(const_int 4) (const_int 6)])))
16585	   (sign_extend:V4SI
16586	    (vec_select:V4HI
16587	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16588	     (parallel [(const_int 0) (const_int 2)
16589			(const_int 4) (const_int 6)]))))
16590	  (mult:V4SI
16591	   (sign_extend:V4SI
16592	    (vec_select:V4HI
16593	     (match_dup 1)
16594	     (parallel [(const_int 1) (const_int 3)
16595			(const_int 5) (const_int 7)])))
16596	   (sign_extend:V4SI
16597	    (vec_select:V4HI
16598	     (match_dup 2)
16599	     (parallel [(const_int 1) (const_int 3)
16600			(const_int 5) (const_int 7)])))))
16601	 (match_operand:V4SI 3 "register_operand" "x")))]
16602  "TARGET_XOP"
16603  "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16604  [(set_attr "type" "ssemuladd")
16605   (set_attr "mode" "TI")])
16606
16607;; XOP parallel XMM conditional moves
16608(define_insn "xop_pcmov_<mode><avxsizesuffix>"
16609  [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
16610	(if_then_else:V_128_256
16611	  (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
16612	  (match_operand:V_128_256 1 "register_operand" "x,x")
16613	  (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
16614  "TARGET_XOP"
16615  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16616  [(set_attr "type" "sse4arg")])
16617
16618;; XOP horizontal add/subtract instructions
16619(define_insn "xop_phadd<u>bw"
16620  [(set (match_operand:V8HI 0 "register_operand" "=x")
16621	(plus:V8HI
16622	 (any_extend:V8HI
16623	  (vec_select:V8QI
16624	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16625	   (parallel [(const_int 0) (const_int 2)
16626		      (const_int 4) (const_int 6)
16627		      (const_int 8) (const_int 10)
16628		      (const_int 12) (const_int 14)])))
16629	 (any_extend:V8HI
16630	  (vec_select:V8QI
16631	   (match_dup 1)
16632	   (parallel [(const_int 1) (const_int 3)
16633		      (const_int 5) (const_int 7)
16634		      (const_int 9) (const_int 11)
16635		      (const_int 13) (const_int 15)])))))]
16636  "TARGET_XOP"
16637  "vphadd<u>bw\t{%1, %0|%0, %1}"
16638  [(set_attr "type" "sseiadd1")])
16639
16640(define_insn "xop_phadd<u>bd"
16641  [(set (match_operand:V4SI 0 "register_operand" "=x")
16642	(plus:V4SI
16643	 (plus:V4SI
16644	  (any_extend:V4SI
16645	   (vec_select:V4QI
16646	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16647	    (parallel [(const_int 0) (const_int 4)
16648		       (const_int 8) (const_int 12)])))
16649	  (any_extend:V4SI
16650	   (vec_select:V4QI
16651	    (match_dup 1)
16652	    (parallel [(const_int 1) (const_int 5)
16653		       (const_int 9) (const_int 13)]))))
16654	 (plus:V4SI
16655	  (any_extend:V4SI
16656	   (vec_select:V4QI
16657	    (match_dup 1)
16658	    (parallel [(const_int 2) (const_int 6)
16659		       (const_int 10) (const_int 14)])))
16660	  (any_extend:V4SI
16661	   (vec_select:V4QI
16662	    (match_dup 1)
16663	    (parallel [(const_int 3) (const_int 7)
16664		       (const_int 11) (const_int 15)]))))))]
16665  "TARGET_XOP"
16666  "vphadd<u>bd\t{%1, %0|%0, %1}"
16667  [(set_attr "type" "sseiadd1")])
16668
16669(define_insn "xop_phadd<u>bq"
16670  [(set (match_operand:V2DI 0 "register_operand" "=x")
16671	(plus:V2DI
16672	 (plus:V2DI
16673	  (plus:V2DI
16674	   (any_extend:V2DI
16675	    (vec_select:V2QI
16676	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16677	     (parallel [(const_int 0) (const_int 8)])))
16678	   (any_extend:V2DI
16679	    (vec_select:V2QI
16680	     (match_dup 1)
16681	     (parallel [(const_int 1) (const_int 9)]))))
16682	  (plus:V2DI
16683	   (any_extend:V2DI
16684	    (vec_select:V2QI
16685	     (match_dup 1)
16686	     (parallel [(const_int 2) (const_int 10)])))
16687	   (any_extend:V2DI
16688	    (vec_select:V2QI
16689	     (match_dup 1)
16690	     (parallel [(const_int 3) (const_int 11)])))))
16691	 (plus:V2DI
16692	  (plus:V2DI
16693	   (any_extend:V2DI
16694	    (vec_select:V2QI
16695	     (match_dup 1)
16696	     (parallel [(const_int 4) (const_int 12)])))
16697	   (any_extend:V2DI
16698	    (vec_select:V2QI
16699	     (match_dup 1)
16700	     (parallel [(const_int 5) (const_int 13)]))))
16701	  (plus:V2DI
16702	   (any_extend:V2DI
16703	    (vec_select:V2QI
16704	     (match_dup 1)
16705	     (parallel [(const_int 6) (const_int 14)])))
16706	   (any_extend:V2DI
16707	    (vec_select:V2QI
16708	     (match_dup 1)
16709	     (parallel [(const_int 7) (const_int 15)])))))))]
16710  "TARGET_XOP"
16711  "vphadd<u>bq\t{%1, %0|%0, %1}"
16712  [(set_attr "type" "sseiadd1")])
16713
16714(define_insn "xop_phadd<u>wd"
16715  [(set (match_operand:V4SI 0 "register_operand" "=x")
16716	(plus:V4SI
16717	 (any_extend:V4SI
16718	  (vec_select:V4HI
16719	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16720	   (parallel [(const_int 0) (const_int 2)
16721		      (const_int 4) (const_int 6)])))
16722	 (any_extend:V4SI
16723	  (vec_select:V4HI
16724	   (match_dup 1)
16725	   (parallel [(const_int 1) (const_int 3)
16726		      (const_int 5) (const_int 7)])))))]
16727  "TARGET_XOP"
16728  "vphadd<u>wd\t{%1, %0|%0, %1}"
16729  [(set_attr "type" "sseiadd1")])
16730
16731(define_insn "xop_phadd<u>wq"
16732  [(set (match_operand:V2DI 0 "register_operand" "=x")
16733	(plus:V2DI
16734	 (plus:V2DI
16735	  (any_extend:V2DI
16736	   (vec_select:V2HI
16737	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16738	    (parallel [(const_int 0) (const_int 4)])))
16739	  (any_extend:V2DI
16740	   (vec_select:V2HI
16741	    (match_dup 1)
16742	    (parallel [(const_int 1) (const_int 5)]))))
16743	 (plus:V2DI
16744	  (any_extend:V2DI
16745	   (vec_select:V2HI
16746	    (match_dup 1)
16747	    (parallel [(const_int 2) (const_int 6)])))
16748	  (any_extend:V2DI
16749	   (vec_select:V2HI
16750	    (match_dup 1)
16751	    (parallel [(const_int 3) (const_int 7)]))))))]
16752  "TARGET_XOP"
16753  "vphadd<u>wq\t{%1, %0|%0, %1}"
16754  [(set_attr "type" "sseiadd1")])
16755
16756(define_insn "xop_phadd<u>dq"
16757  [(set (match_operand:V2DI 0 "register_operand" "=x")
16758	(plus:V2DI
16759	 (any_extend:V2DI
16760	  (vec_select:V2SI
16761	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16762	   (parallel [(const_int 0) (const_int 2)])))
16763	 (any_extend:V2DI
16764	  (vec_select:V2SI
16765	   (match_dup 1)
16766	   (parallel [(const_int 1) (const_int 3)])))))]
16767  "TARGET_XOP"
16768  "vphadd<u>dq\t{%1, %0|%0, %1}"
16769  [(set_attr "type" "sseiadd1")])
16770
16771(define_insn "xop_phsubbw"
16772  [(set (match_operand:V8HI 0 "register_operand" "=x")
16773	(minus:V8HI
16774	 (sign_extend:V8HI
16775	  (vec_select:V8QI
16776	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16777	   (parallel [(const_int 0) (const_int 2)
16778		      (const_int 4) (const_int 6)
16779		      (const_int 8) (const_int 10)
16780		      (const_int 12) (const_int 14)])))
16781	 (sign_extend:V8HI
16782	  (vec_select:V8QI
16783	   (match_dup 1)
16784	   (parallel [(const_int 1) (const_int 3)
16785		      (const_int 5) (const_int 7)
16786		      (const_int 9) (const_int 11)
16787		      (const_int 13) (const_int 15)])))))]
16788  "TARGET_XOP"
16789  "vphsubbw\t{%1, %0|%0, %1}"
16790  [(set_attr "type" "sseiadd1")])
16791
16792(define_insn "xop_phsubwd"
16793  [(set (match_operand:V4SI 0 "register_operand" "=x")
16794	(minus:V4SI
16795	 (sign_extend:V4SI
16796	  (vec_select:V4HI
16797	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16798	   (parallel [(const_int 0) (const_int 2)
16799		      (const_int 4) (const_int 6)])))
16800	 (sign_extend:V4SI
16801	  (vec_select:V4HI
16802	   (match_dup 1)
16803	   (parallel [(const_int 1) (const_int 3)
16804		      (const_int 5) (const_int 7)])))))]
16805  "TARGET_XOP"
16806  "vphsubwd\t{%1, %0|%0, %1}"
16807  [(set_attr "type" "sseiadd1")])
16808
16809(define_insn "xop_phsubdq"
16810  [(set (match_operand:V2DI 0 "register_operand" "=x")
16811	(minus:V2DI
16812	 (sign_extend:V2DI
16813	  (vec_select:V2SI
16814	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16815	   (parallel [(const_int 0) (const_int 2)])))
16816	 (sign_extend:V2DI
16817	  (vec_select:V2SI
16818	   (match_dup 1)
16819	   (parallel [(const_int 1) (const_int 3)])))))]
16820  "TARGET_XOP"
16821  "vphsubdq\t{%1, %0|%0, %1}"
16822  [(set_attr "type" "sseiadd1")])
16823
16824;; XOP permute instructions
16825(define_insn "xop_pperm"
16826  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16827	(unspec:V16QI
16828	  [(match_operand:V16QI 1 "register_operand" "x,x")
16829	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16830	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
16831	  UNSPEC_XOP_PERMUTE))]
16832  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16833  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16834  [(set_attr "type" "sse4arg")
16835   (set_attr "mode" "TI")])
16836
16837;; XOP pack instructions that combine two vectors into a smaller vector
16838(define_insn "xop_pperm_pack_v2di_v4si"
16839  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16840	(vec_concat:V4SI
16841	 (truncate:V2SI
16842	  (match_operand:V2DI 1 "register_operand" "x,x"))
16843	 (truncate:V2SI
16844	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16845   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16846  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16847  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16848  [(set_attr "type" "sse4arg")
16849   (set_attr "mode" "TI")])
16850
16851(define_insn "xop_pperm_pack_v4si_v8hi"
16852  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16853	(vec_concat:V8HI
16854	 (truncate:V4HI
16855	  (match_operand:V4SI 1 "register_operand" "x,x"))
16856	 (truncate:V4HI
16857	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16858   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16859  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16860  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16861  [(set_attr "type" "sse4arg")
16862   (set_attr "mode" "TI")])
16863
16864(define_insn "xop_pperm_pack_v8hi_v16qi"
16865  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16866	(vec_concat:V16QI
16867	 (truncate:V8QI
16868	  (match_operand:V8HI 1 "register_operand" "x,x"))
16869	 (truncate:V8QI
16870	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16871   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16872  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16873  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16874  [(set_attr "type" "sse4arg")
16875   (set_attr "mode" "TI")])
16876
16877;; XOP packed rotate instructions
16878(define_expand "rotl<mode>3"
16879  [(set (match_operand:VI_128 0 "register_operand")
16880	(rotate:VI_128
16881	 (match_operand:VI_128 1 "nonimmediate_operand")
16882	 (match_operand:SI 2 "general_operand")))]
16883  "TARGET_XOP"
16884{
16885  /* If we were given a scalar, convert it to parallel */
16886  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16887    {
16888      rtvec vs = rtvec_alloc (<ssescalarnum>);
16889      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16890      rtx reg = gen_reg_rtx (<MODE>mode);
16891      rtx op2 = operands[2];
16892      int i;
16893
16894      if (GET_MODE (op2) != <ssescalarmode>mode)
16895	{
16896	  op2 = gen_reg_rtx (<ssescalarmode>mode);
16897	  convert_move (op2, operands[2], false);
16898	}
16899
16900      for (i = 0; i < <ssescalarnum>; i++)
16901	RTVEC_ELT (vs, i) = op2;
16902
16903      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16904      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16905      DONE;
16906    }
16907})
16908
16909(define_expand "rotr<mode>3"
16910  [(set (match_operand:VI_128 0 "register_operand")
16911	(rotatert:VI_128
16912	 (match_operand:VI_128 1 "nonimmediate_operand")
16913	 (match_operand:SI 2 "general_operand")))]
16914  "TARGET_XOP"
16915{
16916  /* If we were given a scalar, convert it to parallel */
16917  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16918    {
16919      rtvec vs = rtvec_alloc (<ssescalarnum>);
16920      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16921      rtx neg = gen_reg_rtx (<MODE>mode);
16922      rtx reg = gen_reg_rtx (<MODE>mode);
16923      rtx op2 = operands[2];
16924      int i;
16925
16926      if (GET_MODE (op2) != <ssescalarmode>mode)
16927	{
16928	  op2 = gen_reg_rtx (<ssescalarmode>mode);
16929	  convert_move (op2, operands[2], false);
16930	}
16931
16932      for (i = 0; i < <ssescalarnum>; i++)
16933	RTVEC_ELT (vs, i) = op2;
16934
16935      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16936      emit_insn (gen_neg<mode>2 (neg, reg));
16937      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16938      DONE;
16939    }
16940})
16941
16942(define_insn "xop_rotl<mode>3"
16943  [(set (match_operand:VI_128 0 "register_operand" "=x")
16944	(rotate:VI_128
16945	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16946	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16947  "TARGET_XOP"
16948  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16949  [(set_attr "type" "sseishft")
16950   (set_attr "length_immediate" "1")
16951   (set_attr "mode" "TI")])
16952
16953(define_insn "xop_rotr<mode>3"
16954  [(set (match_operand:VI_128 0 "register_operand" "=x")
16955	(rotatert:VI_128
16956	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16957	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16958  "TARGET_XOP"
16959{
16960  operands[3]
16961    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
16962  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
16963}
16964  [(set_attr "type" "sseishft")
16965   (set_attr "length_immediate" "1")
16966   (set_attr "mode" "TI")])
16967
16968(define_expand "vrotr<mode>3"
16969  [(match_operand:VI_128 0 "register_operand")
16970   (match_operand:VI_128 1 "register_operand")
16971   (match_operand:VI_128 2 "register_operand")]
16972  "TARGET_XOP"
16973{
16974  rtx reg = gen_reg_rtx (<MODE>mode);
16975  emit_insn (gen_neg<mode>2 (reg, operands[2]));
16976  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16977  DONE;
16978})
16979
16980(define_expand "vrotl<mode>3"
16981  [(match_operand:VI_128 0 "register_operand")
16982   (match_operand:VI_128 1 "register_operand")
16983   (match_operand:VI_128 2 "register_operand")]
16984  "TARGET_XOP"
16985{
16986  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16987  DONE;
16988})
16989
16990(define_insn "xop_vrotl<mode>3"
16991  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16992	(if_then_else:VI_128
16993	 (ge:VI_128
16994	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16995	  (const_int 0))
16996	 (rotate:VI_128
16997	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16998	  (match_dup 2))
16999	 (rotatert:VI_128
17000	  (match_dup 1)
17001	  (neg:VI_128 (match_dup 2)))))]
17002  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17003  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17004  [(set_attr "type" "sseishft")
17005   (set_attr "prefix_data16" "0")
17006   (set_attr "prefix_extra" "2")
17007   (set_attr "mode" "TI")])
17008
17009;; XOP packed shift instructions.
17010(define_expand "vlshr<mode>3"
17011  [(set (match_operand:VI12_128 0 "register_operand")
17012	(lshiftrt:VI12_128
17013	  (match_operand:VI12_128 1 "register_operand")
17014	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
17015  "TARGET_XOP"
17016{
17017  rtx neg = gen_reg_rtx (<MODE>mode);
17018  emit_insn (gen_neg<mode>2 (neg, operands[2]));
17019  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
17020  DONE;
17021})
17022
17023(define_expand "vlshr<mode>3"
17024  [(set (match_operand:VI48_128 0 "register_operand")
17025	(lshiftrt:VI48_128
17026	  (match_operand:VI48_128 1 "register_operand")
17027	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
17028  "TARGET_AVX2 || TARGET_XOP"
17029{
17030  if (!TARGET_AVX2)
17031    {
17032      rtx neg = gen_reg_rtx (<MODE>mode);
17033      emit_insn (gen_neg<mode>2 (neg, operands[2]));
17034      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
17035      DONE;
17036    }
17037})
17038
17039(define_expand "vlshr<mode>3"
17040  [(set (match_operand:VI48_512 0 "register_operand")
17041	(lshiftrt:VI48_512
17042	  (match_operand:VI48_512 1 "register_operand")
17043	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
17044  "TARGET_AVX512F")
17045
17046(define_expand "vlshr<mode>3"
17047  [(set (match_operand:VI48_256 0 "register_operand")
17048	(lshiftrt:VI48_256
17049	  (match_operand:VI48_256 1 "register_operand")
17050	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
17051  "TARGET_AVX2")
17052
17053(define_expand "vashrv8hi3<mask_name>"
17054  [(set (match_operand:V8HI 0 "register_operand")
17055	(ashiftrt:V8HI
17056	  (match_operand:V8HI 1 "register_operand")
17057	  (match_operand:V8HI 2 "nonimmediate_operand")))]
17058  "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
17059{
17060  if (TARGET_XOP)
17061    {
17062      rtx neg = gen_reg_rtx (V8HImode);
17063      emit_insn (gen_negv8hi2 (neg, operands[2]));
17064      emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
17065      DONE;
17066    }
17067})
17068
17069(define_expand "vashrv16qi3"
17070  [(set (match_operand:V16QI 0 "register_operand")
17071	(ashiftrt:V16QI
17072	  (match_operand:V16QI 1 "register_operand")
17073	  (match_operand:V16QI 2 "nonimmediate_operand")))]
17074  "TARGET_XOP"
17075{
17076   rtx neg = gen_reg_rtx (V16QImode);
17077   emit_insn (gen_negv16qi2 (neg, operands[2]));
17078   emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
17079   DONE;
17080})
17081
17082(define_expand "vashrv2di3<mask_name>"
17083  [(set (match_operand:V2DI 0 "register_operand")
17084	(ashiftrt:V2DI
17085	  (match_operand:V2DI 1 "register_operand")
17086	  (match_operand:V2DI 2 "nonimmediate_operand")))]
17087  "TARGET_XOP || TARGET_AVX512VL"
17088{
17089  if (TARGET_XOP)
17090    {
17091      rtx neg = gen_reg_rtx (V2DImode);
17092      emit_insn (gen_negv2di2 (neg, operands[2]));
17093      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
17094      DONE;
17095    }
17096})
17097
17098(define_expand "vashrv4si3"
17099  [(set (match_operand:V4SI 0 "register_operand")
17100	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
17101		       (match_operand:V4SI 2 "nonimmediate_operand")))]
17102  "TARGET_AVX2 || TARGET_XOP"
17103{
17104  if (!TARGET_AVX2)
17105    {
17106      rtx neg = gen_reg_rtx (V4SImode);
17107      emit_insn (gen_negv4si2 (neg, operands[2]));
17108      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
17109      DONE;
17110    }
17111})
17112
17113(define_expand "vashrv16si3"
17114  [(set (match_operand:V16SI 0 "register_operand")
17115	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
17116		        (match_operand:V16SI 2 "nonimmediate_operand")))]
17117  "TARGET_AVX512F")
17118
17119(define_expand "vashrv8si3"
17120  [(set (match_operand:V8SI 0 "register_operand")
17121	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
17122		       (match_operand:V8SI 2 "nonimmediate_operand")))]
17123  "TARGET_AVX2")
17124
17125(define_expand "vashl<mode>3"
17126  [(set (match_operand:VI12_128 0 "register_operand")
17127	(ashift:VI12_128
17128	  (match_operand:VI12_128 1 "register_operand")
17129	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
17130  "TARGET_XOP"
17131{
17132  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
17133  DONE;
17134})
17135
17136(define_expand "vashl<mode>3"
17137  [(set (match_operand:VI48_128 0 "register_operand")
17138	(ashift:VI48_128
17139	  (match_operand:VI48_128 1 "register_operand")
17140	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
17141  "TARGET_AVX2 || TARGET_XOP"
17142{
17143  if (!TARGET_AVX2)
17144    {
17145      operands[2] = force_reg (<MODE>mode, operands[2]);
17146      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
17147      DONE;
17148    }
17149})
17150
17151(define_expand "vashl<mode>3"
17152  [(set (match_operand:VI48_512 0 "register_operand")
17153	(ashift:VI48_512
17154	  (match_operand:VI48_512 1 "register_operand")
17155	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
17156  "TARGET_AVX512F")
17157
17158(define_expand "vashl<mode>3"
17159  [(set (match_operand:VI48_256 0 "register_operand")
17160	(ashift:VI48_256
17161	  (match_operand:VI48_256 1 "register_operand")
17162	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
17163  "TARGET_AVX2")
17164
17165(define_insn "xop_sha<mode>3"
17166  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
17167	(if_then_else:VI_128
17168	 (ge:VI_128
17169	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
17170	  (const_int 0))
17171	 (ashift:VI_128
17172	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17173	  (match_dup 2))
17174	 (ashiftrt:VI_128
17175	  (match_dup 1)
17176	  (neg:VI_128 (match_dup 2)))))]
17177  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17178  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17179  [(set_attr "type" "sseishft")
17180   (set_attr "prefix_data16" "0")
17181   (set_attr "prefix_extra" "2")
17182   (set_attr "mode" "TI")])
17183
17184(define_insn "xop_shl<mode>3"
17185  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
17186	(if_then_else:VI_128
17187	 (ge:VI_128
17188	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
17189	  (const_int 0))
17190	 (ashift:VI_128
17191	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17192	  (match_dup 2))
17193	 (lshiftrt:VI_128
17194	  (match_dup 1)
17195	  (neg:VI_128 (match_dup 2)))))]
17196  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17197  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17198  [(set_attr "type" "sseishft")
17199   (set_attr "prefix_data16" "0")
17200   (set_attr "prefix_extra" "2")
17201   (set_attr "mode" "TI")])
17202
17203(define_expand "<shift_insn><mode>3"
17204  [(set (match_operand:VI1_AVX512 0 "register_operand")
17205	(any_shift:VI1_AVX512
17206	  (match_operand:VI1_AVX512 1 "register_operand")
17207	  (match_operand:SI 2 "nonmemory_operand")))]
17208  "TARGET_SSE2"
17209{
17210  if (TARGET_XOP && <MODE>mode == V16QImode)
17211    {
17212      bool negate = false;
17213      rtx (*gen) (rtx, rtx, rtx);
17214      rtx tmp, par;
17215      int i;
17216
17217      if (<CODE> != ASHIFT)
17218	{
17219	  if (CONST_INT_P (operands[2]))
17220	    operands[2] = GEN_INT (-INTVAL (operands[2]));
17221	  else
17222	    negate = true;
17223	}
17224      par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
17225      for (i = 0; i < 16; i++)
17226        XVECEXP (par, 0, i) = operands[2];
17227
17228      tmp = gen_reg_rtx (V16QImode);
17229      emit_insn (gen_vec_initv16qiqi (tmp, par));
17230
17231      if (negate)
17232	emit_insn (gen_negv16qi2 (tmp, tmp));
17233
17234      gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
17235      emit_insn (gen (operands[0], operands[1], tmp));
17236    }
17237  else
17238    ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
17239  DONE;
17240})
17241
17242(define_expand "ashrv2di3"
17243  [(set (match_operand:V2DI 0 "register_operand")
17244	(ashiftrt:V2DI
17245	  (match_operand:V2DI 1 "register_operand")
17246	  (match_operand:DI 2 "nonmemory_operand")))]
17247  "TARGET_XOP || TARGET_AVX512VL"
17248{
17249  if (!TARGET_AVX512VL)
17250    {
17251      rtx reg = gen_reg_rtx (V2DImode);
17252      rtx par;
17253      bool negate = false;
17254      int i;
17255
17256      if (CONST_INT_P (operands[2]))
17257	operands[2] = GEN_INT (-INTVAL (operands[2]));
17258      else
17259	negate = true;
17260
17261      par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
17262      for (i = 0; i < 2; i++)
17263	XVECEXP (par, 0, i) = operands[2];
17264
17265      emit_insn (gen_vec_initv2didi (reg, par));
17266
17267      if (negate)
17268	emit_insn (gen_negv2di2 (reg, reg));
17269
17270      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
17271      DONE;
17272    }
17273})
17274
17275;; XOP FRCZ support
17276(define_insn "xop_frcz<mode>2"
17277  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
17278	(unspec:FMAMODE
17279	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
17280	 UNSPEC_FRCZ))]
17281  "TARGET_XOP"
17282  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
17283  [(set_attr "type" "ssecvt1")
17284   (set_attr "mode" "<MODE>")])
17285
17286(define_expand "xop_vmfrcz<mode>2"
17287  [(set (match_operand:VF_128 0 "register_operand")
17288	(vec_merge:VF_128
17289	  (unspec:VF_128
17290	   [(match_operand:VF_128 1 "nonimmediate_operand")]
17291	   UNSPEC_FRCZ)
17292	  (match_dup 2)
17293	  (const_int 1)))]
17294  "TARGET_XOP"
17295  "operands[2] = CONST0_RTX (<MODE>mode);")
17296
17297(define_insn "*xop_vmfrcz<mode>2"
17298  [(set (match_operand:VF_128 0 "register_operand" "=x")
17299	(vec_merge:VF_128
17300	  (unspec:VF_128
17301	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
17302	   UNSPEC_FRCZ)
17303	  (match_operand:VF_128 2 "const0_operand")
17304	  (const_int 1)))]
17305  "TARGET_XOP"
17306  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
17307  [(set_attr "type" "ssecvt1")
17308   (set_attr "mode" "<MODE>")])
17309
17310(define_insn "xop_maskcmp<mode>3"
17311  [(set (match_operand:VI_128 0 "register_operand" "=x")
17312	(match_operator:VI_128 1 "ix86_comparison_int_operator"
17313	 [(match_operand:VI_128 2 "register_operand" "x")
17314	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17315  "TARGET_XOP"
17316  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17317  [(set_attr "type" "sse4arg")
17318   (set_attr "prefix_data16" "0")
17319   (set_attr "prefix_rep" "0")
17320   (set_attr "prefix_extra" "2")
17321   (set_attr "length_immediate" "1")
17322   (set_attr "mode" "TI")])
17323
17324(define_insn "xop_maskcmp_uns<mode>3"
17325  [(set (match_operand:VI_128 0 "register_operand" "=x")
17326	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17327	 [(match_operand:VI_128 2 "register_operand" "x")
17328	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17329  "TARGET_XOP"
17330  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17331  [(set_attr "type" "ssecmp")
17332   (set_attr "prefix_data16" "0")
17333   (set_attr "prefix_rep" "0")
17334   (set_attr "prefix_extra" "2")
17335   (set_attr "length_immediate" "1")
17336   (set_attr "mode" "TI")])
17337
17338;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
17339;; and pcomneu* not to be converted to the signed ones in case somebody needs
17340;; the exact instruction generated for the intrinsic.
17341(define_insn "xop_maskcmp_uns2<mode>3"
17342  [(set (match_operand:VI_128 0 "register_operand" "=x")
17343	(unspec:VI_128
17344	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17345	  [(match_operand:VI_128 2 "register_operand" "x")
17346	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
17347	 UNSPEC_XOP_UNSIGNED_CMP))]
17348  "TARGET_XOP"
17349  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17350  [(set_attr "type" "ssecmp")
17351   (set_attr "prefix_data16" "0")
17352   (set_attr "prefix_extra" "2")
17353   (set_attr "length_immediate" "1")
17354   (set_attr "mode" "TI")])
17355
17356;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
17357;; being added here to be complete.
17358(define_insn "xop_pcom_tf<mode>3"
17359  [(set (match_operand:VI_128 0 "register_operand" "=x")
17360	(unspec:VI_128
17361	  [(match_operand:VI_128 1 "register_operand" "x")
17362	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
17363	   (match_operand:SI 3 "const_int_operand" "n")]
17364	  UNSPEC_XOP_TRUEFALSE))]
17365  "TARGET_XOP"
17366{
17367  return ((INTVAL (operands[3]) != 0)
17368	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17369	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
17370}
17371  [(set_attr "type" "ssecmp")
17372   (set_attr "prefix_data16" "0")
17373   (set_attr "prefix_extra" "2")
17374   (set_attr "length_immediate" "1")
17375   (set_attr "mode" "TI")])
17376
17377(define_insn "xop_vpermil2<mode>3"
17378  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
17379	(unspec:VF_128_256
17380	  [(match_operand:VF_128_256 1 "register_operand" "x,x")
17381	   (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
17382	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
17383	   (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
17384	  UNSPEC_VPERMIL2))]
17385  "TARGET_XOP"
17386  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
17387  [(set_attr "type" "sse4arg")
17388   (set_attr "length_immediate" "1")
17389   (set_attr "mode" "<MODE>")])
17390
17391;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17392
17393(define_insn "aesenc"
17394  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17395	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17396		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17397		      UNSPEC_AESENC))]
17398  "TARGET_AES"
17399  "@
17400   aesenc\t{%2, %0|%0, %2}
17401   vaesenc\t{%2, %1, %0|%0, %1, %2}"
17402  [(set_attr "isa" "noavx,avx")
17403   (set_attr "type" "sselog1")
17404   (set_attr "prefix_extra" "1")
17405   (set_attr "prefix" "orig,vex")
17406   (set_attr "btver2_decode" "double,double")
17407   (set_attr "mode" "TI")])
17408
17409(define_insn "aesenclast"
17410  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17411	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17412		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17413		      UNSPEC_AESENCLAST))]
17414  "TARGET_AES"
17415  "@
17416   aesenclast\t{%2, %0|%0, %2}
17417   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
17418  [(set_attr "isa" "noavx,avx")
17419   (set_attr "type" "sselog1")
17420   (set_attr "prefix_extra" "1")
17421   (set_attr "prefix" "orig,vex")
17422   (set_attr "btver2_decode" "double,double")
17423   (set_attr "mode" "TI")])
17424
17425(define_insn "aesdec"
17426  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17427	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17428		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17429		      UNSPEC_AESDEC))]
17430  "TARGET_AES"
17431  "@
17432   aesdec\t{%2, %0|%0, %2}
17433   vaesdec\t{%2, %1, %0|%0, %1, %2}"
17434  [(set_attr "isa" "noavx,avx")
17435   (set_attr "type" "sselog1")
17436   (set_attr "prefix_extra" "1")
17437   (set_attr "prefix" "orig,vex")
17438   (set_attr "btver2_decode" "double,double")
17439   (set_attr "mode" "TI")])
17440
17441(define_insn "aesdeclast"
17442  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17443	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17444		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17445		      UNSPEC_AESDECLAST))]
17446  "TARGET_AES"
17447  "@
17448   aesdeclast\t{%2, %0|%0, %2}
17449   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
17450  [(set_attr "isa" "noavx,avx")
17451   (set_attr "type" "sselog1")
17452   (set_attr "prefix_extra" "1")
17453   (set_attr "prefix" "orig,vex")
17454   (set_attr "btver2_decode" "double,double")
17455   (set_attr "mode" "TI")])
17456
17457(define_insn "aesimc"
17458  [(set (match_operand:V2DI 0 "register_operand" "=x")
17459	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
17460		      UNSPEC_AESIMC))]
17461  "TARGET_AES"
17462  "%vaesimc\t{%1, %0|%0, %1}"
17463  [(set_attr "type" "sselog1")
17464   (set_attr "prefix_extra" "1")
17465   (set_attr "prefix" "maybe_vex")
17466   (set_attr "mode" "TI")])
17467
17468(define_insn "aeskeygenassist"
17469  [(set (match_operand:V2DI 0 "register_operand" "=x")
17470	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
17471		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
17472		     UNSPEC_AESKEYGENASSIST))]
17473  "TARGET_AES"
17474  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
17475  [(set_attr "type" "sselog1")
17476   (set_attr "prefix_extra" "1")
17477   (set_attr "length_immediate" "1")
17478   (set_attr "prefix" "maybe_vex")
17479   (set_attr "mode" "TI")])
17480
17481(define_insn "pclmulqdq"
17482  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17483	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17484		      (match_operand:V2DI 2 "vector_operand" "xBm,xm")
17485		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17486		     UNSPEC_PCLMUL))]
17487  "TARGET_PCLMUL"
17488  "@
17489   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17490   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17491  [(set_attr "isa" "noavx,avx")
17492   (set_attr "type" "sselog1")
17493   (set_attr "prefix_extra" "1")
17494   (set_attr "length_immediate" "1")
17495   (set_attr "prefix" "orig,vex")
17496   (set_attr "mode" "TI")])
17497
17498(define_expand "avx_vzeroall"
17499  [(match_par_dup 0 [(const_int 0)])]
17500  "TARGET_AVX"
17501{
17502  int nregs = TARGET_64BIT ? 16 : 8;
17503  int regno;
17504
17505  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17506
17507  XVECEXP (operands[0], 0, 0)
17508    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17509			       UNSPECV_VZEROALL);
17510
17511  for (regno = 0; regno < nregs; regno++)
17512    XVECEXP (operands[0], 0, regno + 1)
17513      = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
17514		     CONST0_RTX (V8SImode));
17515})
17516
17517(define_insn "*avx_vzeroall"
17518  [(match_parallel 0 "vzeroall_operation"
17519    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
17520  "TARGET_AVX"
17521  "vzeroall"
17522  [(set_attr "type" "sse")
17523   (set_attr "modrm" "0")
17524   (set_attr "memory" "none")
17525   (set_attr "prefix" "vex")
17526   (set_attr "btver2_decode" "vector")
17527   (set_attr "mode" "OI")])
17528
17529;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17530;; if the upper 128bits are unused.
17531(define_insn "avx_vzeroupper"
17532  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
17533  "TARGET_AVX"
17534  "vzeroupper"
17535  [(set_attr "type" "sse")
17536   (set_attr "modrm" "0")
17537   (set_attr "memory" "none")
17538   (set_attr "prefix" "vex")
17539   (set_attr "btver2_decode" "vector")
17540   (set_attr "mode" "OI")])
17541
17542(define_mode_attr pbroadcast_evex_isa
17543  [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
17544   (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
17545   (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
17546   (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
17547
17548(define_insn "avx2_pbroadcast<mode>"
17549  [(set (match_operand:VI 0 "register_operand" "=x,v")
17550	(vec_duplicate:VI
17551	  (vec_select:<ssescalarmode>
17552	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
17553	    (parallel [(const_int 0)]))))]
17554  "TARGET_AVX2"
17555  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
17556  [(set_attr "isa" "*,<pbroadcast_evex_isa>")
17557   (set_attr "type" "ssemov")
17558   (set_attr "prefix_extra" "1")
17559   (set_attr "prefix" "vex,evex")
17560   (set_attr "mode" "<sseinsnmode>")])
17561
17562(define_insn "avx2_pbroadcast<mode>_1"
17563  [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
17564	(vec_duplicate:VI_256
17565	  (vec_select:<ssescalarmode>
17566	    (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
17567	    (parallel [(const_int 0)]))))]
17568  "TARGET_AVX2"
17569  "@
17570   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17571   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17572   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17573   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
17574  [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
17575   (set_attr "type" "ssemov")
17576   (set_attr "prefix_extra" "1")
17577   (set_attr "prefix" "vex")
17578   (set_attr "mode" "<sseinsnmode>")])
17579
17580(define_insn "<avx2_avx512>_permvar<mode><mask_name>"
17581  [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17582	(unspec:VI48F_256_512
17583	  [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17584	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17585	  UNSPEC_VPERMVAR))]
17586  "TARGET_AVX2 && <mask_mode512bit_condition>"
17587  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17588  [(set_attr "type" "sselog")
17589   (set_attr "prefix" "<mask_prefix2>")
17590   (set_attr "mode" "<sseinsnmode>")])
17591
17592(define_insn "<avx512>_permvar<mode><mask_name>"
17593  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17594	(unspec:VI1_AVX512VL
17595	  [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17596	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17597	  UNSPEC_VPERMVAR))]
17598  "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17599  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17600  [(set_attr "type" "sselog")
17601   (set_attr "prefix" "<mask_prefix2>")
17602   (set_attr "mode" "<sseinsnmode>")])
17603
17604(define_insn "<avx512>_permvar<mode><mask_name>"
17605  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17606	(unspec:VI2_AVX512VL
17607	  [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17608	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17609	  UNSPEC_VPERMVAR))]
17610  "TARGET_AVX512BW && <mask_mode512bit_condition>"
17611  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17612  [(set_attr "type" "sselog")
17613   (set_attr "prefix" "<mask_prefix2>")
17614   (set_attr "mode" "<sseinsnmode>")])
17615
17616(define_expand "avx2_perm<mode>"
17617  [(match_operand:VI8F_256 0 "register_operand")
17618   (match_operand:VI8F_256 1 "nonimmediate_operand")
17619   (match_operand:SI 2 "const_0_to_255_operand")]
17620  "TARGET_AVX2"
17621{
17622  int mask = INTVAL (operands[2]);
17623  emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
17624				    GEN_INT ((mask >> 0) & 3),
17625				    GEN_INT ((mask >> 2) & 3),
17626				    GEN_INT ((mask >> 4) & 3),
17627				    GEN_INT ((mask >> 6) & 3)));
17628  DONE;
17629})
17630
17631(define_expand "avx512vl_perm<mode>_mask"
17632  [(match_operand:VI8F_256 0 "register_operand")
17633   (match_operand:VI8F_256 1 "nonimmediate_operand")
17634   (match_operand:SI 2 "const_0_to_255_operand")
17635   (match_operand:VI8F_256 3 "vector_move_operand")
17636   (match_operand:<avx512fmaskmode> 4 "register_operand")]
17637  "TARGET_AVX512VL"
17638{
17639  int mask = INTVAL (operands[2]);
17640  emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
17641						  GEN_INT ((mask >> 0) & 3),
17642						  GEN_INT ((mask >> 2) & 3),
17643						  GEN_INT ((mask >> 4) & 3),
17644						  GEN_INT ((mask >> 6) & 3),
17645						  operands[3], operands[4]));
17646  DONE;
17647})
17648
17649(define_insn "avx2_perm<mode>_1<mask_name>"
17650  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17651	(vec_select:VI8F_256
17652	  (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
17653	  (parallel [(match_operand 2 "const_0_to_3_operand")
17654		     (match_operand 3 "const_0_to_3_operand")
17655		     (match_operand 4 "const_0_to_3_operand")
17656		     (match_operand 5 "const_0_to_3_operand")])))]
17657  "TARGET_AVX2 && <mask_mode512bit_condition>"
17658{
17659  int mask = 0;
17660  mask |= INTVAL (operands[2]) << 0;
17661  mask |= INTVAL (operands[3]) << 2;
17662  mask |= INTVAL (operands[4]) << 4;
17663  mask |= INTVAL (operands[5]) << 6;
17664  operands[2] = GEN_INT (mask);
17665  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
17666}
17667  [(set_attr "type" "sselog")
17668   (set_attr "prefix" "<mask_prefix2>")
17669   (set_attr "mode" "<sseinsnmode>")])
17670
17671(define_expand "avx512f_perm<mode>"
17672  [(match_operand:V8FI 0 "register_operand")
17673   (match_operand:V8FI 1 "nonimmediate_operand")
17674   (match_operand:SI 2 "const_0_to_255_operand")]
17675  "TARGET_AVX512F"
17676{
17677  int mask = INTVAL (operands[2]);
17678  emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
17679				       GEN_INT ((mask >> 0) & 3),
17680				       GEN_INT ((mask >> 2) & 3),
17681				       GEN_INT ((mask >> 4) & 3),
17682				       GEN_INT ((mask >> 6) & 3),
17683				       GEN_INT (((mask >> 0) & 3) + 4),
17684				       GEN_INT (((mask >> 2) & 3) + 4),
17685				       GEN_INT (((mask >> 4) & 3) + 4),
17686				       GEN_INT (((mask >> 6) & 3) + 4)));
17687  DONE;
17688})
17689
17690(define_expand "avx512f_perm<mode>_mask"
17691  [(match_operand:V8FI 0 "register_operand")
17692   (match_operand:V8FI 1 "nonimmediate_operand")
17693   (match_operand:SI 2 "const_0_to_255_operand")
17694   (match_operand:V8FI 3 "vector_move_operand")
17695   (match_operand:<avx512fmaskmode> 4 "register_operand")]
17696  "TARGET_AVX512F"
17697{
17698  int mask = INTVAL (operands[2]);
17699  emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
17700					    GEN_INT ((mask >> 0) & 3),
17701					    GEN_INT ((mask >> 2) & 3),
17702					    GEN_INT ((mask >> 4) & 3),
17703					    GEN_INT ((mask >> 6) & 3),
17704					    GEN_INT (((mask >> 0) & 3) + 4),
17705					    GEN_INT (((mask >> 2) & 3) + 4),
17706					    GEN_INT (((mask >> 4) & 3) + 4),
17707					    GEN_INT (((mask >> 6) & 3) + 4),
17708					    operands[3], operands[4]));
17709  DONE;
17710})
17711
17712(define_insn "avx512f_perm<mode>_1<mask_name>"
17713  [(set (match_operand:V8FI 0 "register_operand" "=v")
17714	(vec_select:V8FI
17715	  (match_operand:V8FI 1 "nonimmediate_operand" "vm")
17716	  (parallel [(match_operand 2 "const_0_to_3_operand")
17717		     (match_operand 3 "const_0_to_3_operand")
17718		     (match_operand 4 "const_0_to_3_operand")
17719		     (match_operand 5 "const_0_to_3_operand")
17720		     (match_operand 6 "const_4_to_7_operand")
17721		     (match_operand 7 "const_4_to_7_operand")
17722		     (match_operand 8 "const_4_to_7_operand")
17723		     (match_operand 9 "const_4_to_7_operand")])))]
17724  "TARGET_AVX512F && <mask_mode512bit_condition>
17725   && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
17726       && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
17727       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
17728       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
17729{
17730  int mask = 0;
17731  mask |= INTVAL (operands[2]) << 0;
17732  mask |= INTVAL (operands[3]) << 2;
17733  mask |= INTVAL (operands[4]) << 4;
17734  mask |= INTVAL (operands[5]) << 6;
17735  operands[2] = GEN_INT (mask);
17736  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
17737}
17738  [(set_attr "type" "sselog")
17739   (set_attr "prefix" "<mask_prefix2>")
17740   (set_attr "mode" "<sseinsnmode>")])
17741
17742(define_insn "avx2_permv2ti"
17743  [(set (match_operand:V4DI 0 "register_operand" "=x")
17744	(unspec:V4DI
17745	  [(match_operand:V4DI 1 "register_operand" "x")
17746	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
17747	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
17748	  UNSPEC_VPERMTI))]
17749  "TARGET_AVX2"
17750  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17751  [(set_attr "type" "sselog")
17752   (set_attr "prefix" "vex")
17753   (set_attr "mode" "OI")])
17754
17755(define_insn "avx2_vec_dupv4df"
17756  [(set (match_operand:V4DF 0 "register_operand" "=v")
17757	(vec_duplicate:V4DF
17758	  (vec_select:DF
17759	    (match_operand:V2DF 1 "register_operand" "v")
17760	    (parallel [(const_int 0)]))))]
17761  "TARGET_AVX2"
17762  "vbroadcastsd\t{%1, %0|%0, %1}"
17763  [(set_attr "type" "sselog1")
17764   (set_attr "prefix" "maybe_evex")
17765   (set_attr "mode" "V4DF")])
17766
17767(define_insn "<avx512>_vec_dup<mode>_1"
17768  [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17769	(vec_duplicate:VI_AVX512BW
17770	  (vec_select:<ssescalarmode>
17771	    (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17772	    (parallel [(const_int 0)]))))]
17773  "TARGET_AVX512F"
17774  "@
17775   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17776   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
17777  [(set_attr "type" "ssemov")
17778   (set_attr "prefix" "evex")
17779   (set_attr "mode" "<sseinsnmode>")])
17780
17781(define_insn "<avx512>_vec_dup<mode><mask_name>"
17782  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17783	(vec_duplicate:V48_AVX512VL
17784	  (vec_select:<ssescalarmode>
17785	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17786	    (parallel [(const_int 0)]))))]
17787  "TARGET_AVX512F"
17788{
17789  /*  There is no DF broadcast (in AVX-512*) to 128b register.
17790      Mimic it with integer variant.  */
17791  if (<MODE>mode == V2DFmode)
17792    return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17793
17794  return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
17795}
17796  [(set_attr "type" "ssemov")
17797   (set_attr "prefix" "evex")
17798   (set_attr "mode" "<sseinsnmode>")])
17799
17800(define_insn "<avx512>_vec_dup<mode><mask_name>"
17801  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17802	(vec_duplicate:VI12_AVX512VL
17803	  (vec_select:<ssescalarmode>
17804	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17805	    (parallel [(const_int 0)]))))]
17806  "TARGET_AVX512BW"
17807  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
17808  [(set_attr "type" "ssemov")
17809   (set_attr "prefix" "evex")
17810   (set_attr "mode" "<sseinsnmode>")])
17811
17812(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17813  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17814	(vec_duplicate:V16FI
17815	  (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17816  "TARGET_AVX512F"
17817  "@
17818   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17819   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17820  [(set_attr "type" "ssemov")
17821   (set_attr "prefix" "evex")
17822   (set_attr "mode" "<sseinsnmode>")])
17823
17824(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17825  [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17826	(vec_duplicate:V8FI
17827	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17828  "TARGET_AVX512F"
17829  "@
17830   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17831   vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17832  [(set_attr "type" "ssemov")
17833   (set_attr "prefix" "evex")
17834   (set_attr "mode" "<sseinsnmode>")])
17835
17836(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17837  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
17838	(vec_duplicate:VI12_AVX512VL
17839	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17840  "TARGET_AVX512BW"
17841  "@
17842   vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17843   vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
17844  [(set_attr "type" "ssemov")
17845   (set_attr "prefix" "evex")
17846   (set_attr "mode" "<sseinsnmode>")])
17847
17848(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17849  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
17850	(vec_duplicate:V48_AVX512VL
17851	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17852  "TARGET_AVX512F"
17853  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17854  [(set_attr "type" "ssemov")
17855   (set_attr "prefix" "evex")
17856   (set_attr "mode" "<sseinsnmode>")
17857   (set (attr "enabled")
17858     (if_then_else (eq_attr "alternative" "1")
17859	(symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17860		     && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17861	(const_int 1)))])
17862
17863(define_insn "vec_dupv4sf"
17864  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
17865	(vec_duplicate:V4SF
17866	  (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
17867  "TARGET_SSE"
17868  "@
17869   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17870   vbroadcastss\t{%1, %0|%0, %1}
17871   shufps\t{$0, %0, %0|%0, %0, 0}"
17872  [(set_attr "isa" "avx,avx,noavx")
17873   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17874   (set_attr "length_immediate" "1,0,1")
17875   (set_attr "prefix_extra" "0,1,*")
17876   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
17877   (set_attr "mode" "V4SF")])
17878
17879(define_insn "*vec_dupv4si"
17880  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
17881	(vec_duplicate:V4SI
17882	  (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
17883  "TARGET_SSE"
17884  "@
17885   %vpshufd\t{$0, %1, %0|%0, %1, 0}
17886   vbroadcastss\t{%1, %0|%0, %1}
17887   shufps\t{$0, %0, %0|%0, %0, 0}"
17888  [(set_attr "isa" "sse2,avx,noavx")
17889   (set_attr "type" "sselog1,ssemov,sselog1")
17890   (set_attr "length_immediate" "1,0,1")
17891   (set_attr "prefix_extra" "0,1,*")
17892   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
17893   (set_attr "mode" "TI,V4SF,V4SF")])
17894
17895(define_insn "*vec_dupv2di"
17896  [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,x")
17897	(vec_duplicate:V2DI
17898	  (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))]
17899  "TARGET_SSE"
17900  "@
17901   punpcklqdq\t%0, %0
17902   vpunpcklqdq\t{%d1, %0|%0, %d1}
17903   %vmovddup\t{%1, %0|%0, %1}
17904   movlhps\t%0, %0"
17905  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17906   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17907   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
17908   (set_attr "mode" "TI,TI,DF,V4SF")])
17909
17910(define_insn "avx2_vbroadcasti128_<mode>"
17911  [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
17912	(vec_concat:VI_256
17913	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
17914	  (match_dup 1)))]
17915  "TARGET_AVX2"
17916  "@
17917   vbroadcasti128\t{%1, %0|%0, %1}
17918   vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17919   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
17920  [(set_attr "isa" "*,avx512dq,avx512vl")
17921   (set_attr "type" "ssemov")
17922   (set_attr "prefix_extra" "1")
17923   (set_attr "prefix" "vex,evex,evex")
17924   (set_attr "mode" "OI")])
17925
17926;; Modes handled by AVX vec_dup patterns.
17927(define_mode_iterator AVX_VEC_DUP_MODE
17928  [V8SI V8SF V4DI V4DF])
17929(define_mode_attr vecdupssescalarmodesuffix
17930  [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
17931;; Modes handled by AVX2 vec_dup patterns.
17932(define_mode_iterator AVX2_VEC_DUP_MODE
17933  [V32QI V16QI V16HI V8HI V8SI V4SI])
17934
17935(define_insn "*vec_dup<mode>"
17936  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
17937	(vec_duplicate:AVX2_VEC_DUP_MODE
17938	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
17939  "TARGET_AVX2"
17940  "@
17941   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17942   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17943   #"
17944  [(set_attr "isa" "*,*,noavx512vl")
17945   (set_attr "type" "ssemov")
17946   (set_attr "prefix_extra" "1")
17947   (set_attr "prefix" "maybe_evex")
17948   (set_attr "mode" "<sseinsnmode>")])
17949
17950(define_insn "vec_dup<mode>"
17951  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
17952	(vec_duplicate:AVX_VEC_DUP_MODE
17953	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
17954  "TARGET_AVX"
17955  "@
17956   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17957   vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
17958   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17959   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
17960   #"
17961  [(set_attr "type" "ssemov")
17962   (set_attr "prefix_extra" "1")
17963   (set_attr "prefix" "maybe_evex")
17964   (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17965   (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
17966
17967(define_split
17968  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17969	(vec_duplicate:AVX2_VEC_DUP_MODE
17970	  (match_operand:<ssescalarmode> 1 "register_operand")))]
17971  "TARGET_AVX2
17972   /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17973      available, because then we can broadcast from GPRs directly.
17974      For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17975      for V*SI mode it requires just -mavx512vl.  */
17976   && !(TARGET_AVX512VL
17977	&& (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17978   && reload_completed && GENERAL_REG_P (operands[1])"
17979  [(const_int 0)]
17980{
17981  emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17982				CONST0_RTX (V4SImode),
17983				gen_lowpart (SImode, operands[1])));
17984  emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17985					gen_lowpart (<ssexmmmode>mode,
17986						     operands[0])));
17987  DONE;
17988})
17989
17990(define_split
17991  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
17992	(vec_duplicate:AVX_VEC_DUP_MODE
17993	  (match_operand:<ssescalarmode> 1 "register_operand")))]
17994  "TARGET_AVX && !TARGET_AVX2 && reload_completed"
17995  [(set (match_dup 2)
17996	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17997   (set (match_dup 0)
17998	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
17999  "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
18000
18001(define_insn "avx_vbroadcastf128_<mode>"
18002  [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
18003	(vec_concat:V_256
18004	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
18005	  (match_dup 1)))]
18006  "TARGET_AVX"
18007  "@
18008   vbroadcast<i128>\t{%1, %0|%0, %1}
18009   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
18010   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
18011   vbroadcast<i128vldq>\t{%1, %0|%0, %1}
18012   vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
18013   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
18014   vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
18015  [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
18016   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
18017   (set_attr "prefix_extra" "1")
18018   (set_attr "length_immediate" "0,1,1,0,1,0,1")
18019   (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
18020   (set_attr "mode" "<sseinsnmode>")])
18021
18022;; For broadcast[i|f]32x2.  Yes there is no v4sf version, only v4si.
18023(define_mode_iterator VI4F_BRCST32x2
18024  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
18025   V16SF (V8SF "TARGET_AVX512VL")])
18026
18027(define_mode_attr 64x2mode
18028  [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
18029
18030(define_mode_attr 32x2mode
18031  [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
18032  (V8SF "V2SF") (V4SI "V2SI")])
18033
18034(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
18035  [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
18036	(vec_duplicate:VI4F_BRCST32x2
18037	  (vec_select:<32x2mode>
18038	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
18039	    (parallel [(const_int 0) (const_int 1)]))))]
18040  "TARGET_AVX512DQ"
18041  "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
18042  [(set_attr "type" "ssemov")
18043   (set_attr "prefix_extra" "1")
18044   (set_attr "prefix" "evex")
18045   (set_attr "mode" "<sseinsnmode>")])
18046
18047(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
18048  [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
18049        (vec_duplicate:VI4F_256
18050         (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
18051  "TARGET_AVX512VL"
18052  "@
18053   vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
18054   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18055  [(set_attr "type" "ssemov")
18056   (set_attr "prefix_extra" "1")
18057   (set_attr "prefix" "evex")
18058   (set_attr "mode" "<sseinsnmode>")])
18059
18060(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
18061  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
18062       (vec_duplicate:V16FI
18063         (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
18064  "TARGET_AVX512DQ"
18065  "@
18066   vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
18067   vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18068  [(set_attr "type" "ssemov")
18069   (set_attr "prefix_extra" "1")
18070   (set_attr "prefix" "evex")
18071   (set_attr "mode" "<sseinsnmode>")])
18072
18073;; For broadcast[i|f]64x2
18074(define_mode_iterator VI8F_BRCST64x2
18075  [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
18076
18077(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
18078  [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
18079       (vec_duplicate:VI8F_BRCST64x2
18080         (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
18081  "TARGET_AVX512DQ"
18082  "@
18083   vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
18084   vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18085  [(set_attr "type" "ssemov")
18086   (set_attr "prefix_extra" "1")
18087   (set_attr "prefix" "evex")
18088   (set_attr "mode" "<sseinsnmode>")])
18089
18090(define_insn "avx512cd_maskb_vec_dup<mode>"
18091  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18092	(vec_duplicate:VI8_AVX512VL
18093	  (zero_extend:DI
18094	    (match_operand:QI 1 "register_operand" "Yk"))))]
18095  "TARGET_AVX512CD"
18096  "vpbroadcastmb2q\t{%1, %0|%0, %1}"
18097  [(set_attr "type" "mskmov")
18098   (set_attr "prefix" "evex")
18099   (set_attr "mode" "XI")])
18100
18101(define_insn "avx512cd_maskw_vec_dup<mode>"
18102  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
18103	(vec_duplicate:VI4_AVX512VL
18104	  (zero_extend:SI
18105	    (match_operand:HI 1 "register_operand" "Yk"))))]
18106  "TARGET_AVX512CD"
18107  "vpbroadcastmw2d\t{%1, %0|%0, %1}"
18108  [(set_attr "type" "mskmov")
18109   (set_attr "prefix" "evex")
18110   (set_attr "mode" "XI")])
18111
18112;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
18113;; If it so happens that the input is in memory, use vbroadcast.
18114;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
18115(define_insn "*avx_vperm_broadcast_v4sf"
18116  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
18117	(vec_select:V4SF
18118	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
18119	  (match_parallel 2 "avx_vbroadcast_operand"
18120	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
18121  "TARGET_AVX"
18122{
18123  int elt = INTVAL (operands[3]);
18124  switch (which_alternative)
18125    {
18126    case 0:
18127    case 1:
18128      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
18129      return "vbroadcastss\t{%1, %0|%0, %k1}";
18130    case 2:
18131      operands[2] = GEN_INT (elt * 0x55);
18132      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
18133    default:
18134      gcc_unreachable ();
18135    }
18136}
18137  [(set_attr "type" "ssemov,ssemov,sselog1")
18138   (set_attr "prefix_extra" "1")
18139   (set_attr "length_immediate" "0,0,1")
18140   (set_attr "prefix" "maybe_evex")
18141   (set_attr "mode" "SF,SF,V4SF")])
18142
18143(define_insn_and_split "*avx_vperm_broadcast_<mode>"
18144  [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
18145	(vec_select:VF_256
18146	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
18147	  (match_parallel 2 "avx_vbroadcast_operand"
18148	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
18149  "TARGET_AVX"
18150  "#"
18151  "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
18152  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
18153{
18154  rtx op0 = operands[0], op1 = operands[1];
18155  int elt = INTVAL (operands[3]);
18156
18157  if (REG_P (op1))
18158    {
18159      int mask;
18160
18161      if (TARGET_AVX2 && elt == 0)
18162	{
18163	  emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
18164							  op1)));
18165	  DONE;
18166	}
18167
18168      /* Shuffle element we care about into all elements of the 128-bit lane.
18169	 The other lane gets shuffled too, but we don't care.  */
18170      if (<MODE>mode == V4DFmode)
18171	mask = (elt & 1 ? 15 : 0);
18172      else
18173	mask = (elt & 3) * 0x55;
18174      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
18175
18176      /* Shuffle the lane we care about into both lanes of the dest.  */
18177      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
18178      if (EXT_REX_SSE_REG_P (op0))
18179	{
18180	  /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
18181	     or VSHUFF128.  */
18182	  gcc_assert (<MODE>mode == V8SFmode);
18183	  if ((mask & 1) == 0)
18184	    emit_insn (gen_avx2_vec_dupv8sf (op0,
18185					     gen_lowpart (V4SFmode, op0)));
18186	  else
18187	    emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
18188						  GEN_INT (4), GEN_INT (5),
18189						  GEN_INT (6), GEN_INT (7),
18190						  GEN_INT (12), GEN_INT (13),
18191						  GEN_INT (14), GEN_INT (15)));
18192	  DONE;
18193	}
18194
18195      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
18196      DONE;
18197    }
18198
18199  operands[1] = adjust_address (op1, <ssescalarmode>mode,
18200				elt * GET_MODE_SIZE (<ssescalarmode>mode));
18201})
18202
18203(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18204  [(set (match_operand:VF2 0 "register_operand")
18205	(vec_select:VF2
18206	  (match_operand:VF2 1 "nonimmediate_operand")
18207	  (match_operand:SI 2 "const_0_to_255_operand")))]
18208  "TARGET_AVX && <mask_mode512bit_condition>"
18209{
18210  int mask = INTVAL (operands[2]);
18211  rtx perm[<ssescalarnum>];
18212
18213  int i;
18214  for (i = 0; i < <ssescalarnum>; i = i + 2)
18215    {
18216      perm[i]     = GEN_INT (((mask >> i)       & 1) + i);
18217      perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
18218    }
18219
18220  operands[2]
18221    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18222})
18223
18224(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18225  [(set (match_operand:VF1 0 "register_operand")
18226	(vec_select:VF1
18227	  (match_operand:VF1 1 "nonimmediate_operand")
18228	  (match_operand:SI 2 "const_0_to_255_operand")))]
18229  "TARGET_AVX && <mask_mode512bit_condition>"
18230{
18231  int mask = INTVAL (operands[2]);
18232  rtx perm[<ssescalarnum>];
18233
18234  int i;
18235  for (i = 0; i < <ssescalarnum>; i = i + 4)
18236    {
18237      perm[i]     = GEN_INT (((mask >> 0) & 3) + i);
18238      perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
18239      perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
18240      perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
18241    }
18242
18243  operands[2]
18244    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18245})
18246
18247(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
18248  [(set (match_operand:VF 0 "register_operand" "=v")
18249	(vec_select:VF
18250	  (match_operand:VF 1 "nonimmediate_operand" "vm")
18251	  (match_parallel 2 ""
18252	    [(match_operand 3 "const_int_operand")])))]
18253  "TARGET_AVX && <mask_mode512bit_condition>
18254   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
18255{
18256  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
18257  operands[2] = GEN_INT (mask);
18258  return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
18259}
18260  [(set_attr "type" "sselog")
18261   (set_attr "prefix_extra" "1")
18262   (set_attr "length_immediate" "1")
18263   (set_attr "prefix" "<mask_prefix>")
18264   (set_attr "mode" "<sseinsnmode>")])
18265
18266(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
18267  [(set (match_operand:VF 0 "register_operand" "=v")
18268	(unspec:VF
18269	  [(match_operand:VF 1 "register_operand" "v")
18270	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
18271	  UNSPEC_VPERMIL))]
18272  "TARGET_AVX && <mask_mode512bit_condition>"
18273  "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18274  [(set_attr "type" "sselog")
18275   (set_attr "prefix_extra" "1")
18276   (set_attr "btver2_decode" "vector")
18277   (set_attr "prefix" "<mask_prefix>")
18278   (set_attr "mode" "<sseinsnmode>")])
18279
18280(define_mode_iterator VPERMI2
18281  [V16SI V16SF V8DI V8DF
18282   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
18283   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
18284   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
18285   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
18286   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18287   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18288   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18289   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18290
18291(define_mode_iterator VPERMI2I
18292  [V16SI V8DI
18293   (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
18294   (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
18295   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18296   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18297   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18298   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18299
18300(define_expand "<avx512>_vpermi2var<mode>3_mask"
18301  [(set (match_operand:VPERMI2 0 "register_operand")
18302	(vec_merge:VPERMI2
18303	  (unspec:VPERMI2
18304	    [(match_operand:<sseintvecmode> 2 "register_operand")
18305	     (match_operand:VPERMI2 1 "register_operand")
18306	     (match_operand:VPERMI2 3 "nonimmediate_operand")]
18307	    UNSPEC_VPERMT2)
18308	  (match_dup 5)
18309	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
18310  "TARGET_AVX512F"
18311{
18312  operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
18313  operands[5] = gen_lowpart (<MODE>mode, operands[2]);
18314})
18315
18316(define_insn "*<avx512>_vpermi2var<mode>3_mask"
18317  [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
18318	(vec_merge:VPERMI2I
18319	  (unspec:VPERMI2I
18320	    [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18321	     (match_operand:VPERMI2I 1 "register_operand" "v")
18322	     (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
18323	    UNSPEC_VPERMT2)
18324	  (match_dup 2)
18325	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18326  "TARGET_AVX512F"
18327  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18328  [(set_attr "type" "sselog")
18329   (set_attr "prefix" "evex")
18330   (set_attr "mode" "<sseinsnmode>")])
18331
18332(define_insn "*<avx512>_vpermi2var<mode>3_mask"
18333  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18334	(vec_merge:VF_AVX512VL
18335	  (unspec:VF_AVX512VL
18336	    [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18337	     (match_operand:VF_AVX512VL 1 "register_operand" "v")
18338	     (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
18339	    UNSPEC_VPERMT2)
18340	  (subreg:VF_AVX512VL (match_dup 2) 0)
18341	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18342  "TARGET_AVX512F"
18343  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18344  [(set_attr "type" "sselog")
18345   (set_attr "prefix" "evex")
18346   (set_attr "mode" "<sseinsnmode>")])
18347
18348(define_expand "<avx512>_vpermt2var<mode>3_maskz"
18349  [(match_operand:VPERMI2 0 "register_operand")
18350   (match_operand:<sseintvecmode> 1 "register_operand")
18351   (match_operand:VPERMI2 2 "register_operand")
18352   (match_operand:VPERMI2 3 "nonimmediate_operand")
18353   (match_operand:<avx512fmaskmode> 4 "register_operand")]
18354  "TARGET_AVX512F"
18355{
18356  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18357	operands[0], operands[1], operands[2], operands[3],
18358	CONST0_RTX (<MODE>mode), operands[4]));
18359  DONE;
18360})
18361
18362(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18363  [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
18364	(unspec:VPERMI2
18365	  [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
18366	   (match_operand:VPERMI2 2 "register_operand" "0,v")
18367	   (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
18368	  UNSPEC_VPERMT2))]
18369  "TARGET_AVX512F"
18370  "@
18371   vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
18372   vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18373  [(set_attr "type" "sselog")
18374   (set_attr "prefix" "evex")
18375   (set_attr "mode" "<sseinsnmode>")])
18376
18377(define_insn "<avx512>_vpermt2var<mode>3_mask"
18378  [(set (match_operand:VPERMI2 0 "register_operand" "=v")
18379	(vec_merge:VPERMI2
18380	  (unspec:VPERMI2
18381	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18382	    (match_operand:VPERMI2 2 "register_operand" "0")
18383	    (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
18384	    UNSPEC_VPERMT2)
18385	  (match_dup 2)
18386	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18387  "TARGET_AVX512F"
18388  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18389  [(set_attr "type" "sselog")
18390   (set_attr "prefix" "evex")
18391   (set_attr "mode" "<sseinsnmode>")])
18392
18393(define_expand "avx_vperm2f128<mode>3"
18394  [(set (match_operand:AVX256MODE2P 0 "register_operand")
18395	(unspec:AVX256MODE2P
18396	  [(match_operand:AVX256MODE2P 1 "register_operand")
18397	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
18398	   (match_operand:SI 3 "const_0_to_255_operand")]
18399	  UNSPEC_VPERMIL2F128))]
18400  "TARGET_AVX"
18401{
18402  int mask = INTVAL (operands[3]);
18403  if ((mask & 0x88) == 0)
18404    {
18405      rtx perm[<ssescalarnum>], t1, t2;
18406      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
18407
18408      base = (mask & 3) * nelt2;
18409      for (i = 0; i < nelt2; ++i)
18410	perm[i] = GEN_INT (base + i);
18411
18412      base = ((mask >> 4) & 3) * nelt2;
18413      for (i = 0; i < nelt2; ++i)
18414	perm[i + nelt2] = GEN_INT (base + i);
18415
18416      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
18417			       operands[1], operands[2]);
18418      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
18419      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
18420      t2 = gen_rtx_SET (operands[0], t2);
18421      emit_insn (t2);
18422      DONE;
18423    }
18424})
18425
18426;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
18427;; means that in order to represent this properly in rtl we'd have to
18428;; nest *another* vec_concat with a zero operand and do the select from
18429;; a 4x wide vector.  That doesn't seem very nice.
18430(define_insn "*avx_vperm2f128<mode>_full"
18431  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18432	(unspec:AVX256MODE2P
18433	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
18434	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
18435	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
18436	  UNSPEC_VPERMIL2F128))]
18437  "TARGET_AVX"
18438  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18439  [(set_attr "type" "sselog")
18440   (set_attr "prefix_extra" "1")
18441   (set_attr "length_immediate" "1")
18442   (set_attr "prefix" "vex")
18443   (set_attr "mode" "<sseinsnmode>")])
18444
18445(define_insn "*avx_vperm2f128<mode>_nozero"
18446  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18447	(vec_select:AVX256MODE2P
18448	  (vec_concat:<ssedoublevecmode>
18449	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
18450	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
18451	  (match_parallel 3 ""
18452	    [(match_operand 4 "const_int_operand")])))]
18453  "TARGET_AVX
18454   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
18455{
18456  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
18457  if (mask == 0x12)
18458    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18459  if (mask == 0x20)
18460    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
18461  operands[3] = GEN_INT (mask);
18462  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18463}
18464  [(set_attr "type" "sselog")
18465   (set_attr "prefix_extra" "1")
18466   (set_attr "length_immediate" "1")
18467   (set_attr "prefix" "vex")
18468   (set_attr "mode" "<sseinsnmode>")])
18469
18470(define_insn "*ssse3_palignr<mode>_perm"
18471  [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
18472      (vec_select:V_128
18473	(match_operand:V_128 1 "register_operand" "0,x,v")
18474	(match_parallel 2 "palignr_operand"
18475	  [(match_operand 3 "const_int_operand" "n,n,n")])))]
18476  "TARGET_SSSE3"
18477{
18478  operands[2] = (GEN_INT (INTVAL (operands[3])
18479		 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
18480
18481  switch (which_alternative)
18482    {
18483    case 0:
18484      return "palignr\t{%2, %1, %0|%0, %1, %2}";
18485    case 1:
18486    case 2:
18487      return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18488    default:
18489      gcc_unreachable ();
18490    }
18491}
18492  [(set_attr "isa" "noavx,avx,avx512bw")
18493   (set_attr "type" "sseishft")
18494   (set_attr "atom_unit" "sishuf")
18495   (set_attr "prefix_data16" "1,*,*")
18496   (set_attr "prefix_extra" "1")
18497   (set_attr "length_immediate" "1")
18498   (set_attr "prefix" "orig,vex,evex")])
18499
18500(define_expand "avx512vl_vinsert<mode>"
18501  [(match_operand:VI48F_256 0 "register_operand")
18502   (match_operand:VI48F_256 1 "register_operand")
18503   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18504   (match_operand:SI 3 "const_0_to_1_operand")
18505   (match_operand:VI48F_256 4 "register_operand")
18506   (match_operand:<avx512fmaskmode> 5 "register_operand")]
18507  "TARGET_AVX512VL"
18508{
18509  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18510
18511  switch (INTVAL (operands[3]))
18512    {
18513    case 0:
18514      insn = gen_vec_set_lo_<mode>_mask;
18515      break;
18516    case 1:
18517      insn = gen_vec_set_hi_<mode>_mask;
18518      break;
18519    default:
18520      gcc_unreachable ();
18521    }
18522
18523  emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18524		   operands[5]));
18525  DONE;
18526})
18527
18528(define_expand "avx_vinsertf128<mode>"
18529  [(match_operand:V_256 0 "register_operand")
18530   (match_operand:V_256 1 "register_operand")
18531   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18532   (match_operand:SI 3 "const_0_to_1_operand")]
18533  "TARGET_AVX"
18534{
18535  rtx (*insn)(rtx, rtx, rtx);
18536
18537  switch (INTVAL (operands[3]))
18538    {
18539    case 0:
18540      insn = gen_vec_set_lo_<mode>;
18541      break;
18542    case 1:
18543      insn = gen_vec_set_hi_<mode>;
18544      break;
18545    default:
18546      gcc_unreachable ();
18547    }
18548
18549  emit_insn (insn (operands[0], operands[1], operands[2]));
18550  DONE;
18551})
18552
18553(define_insn "vec_set_lo_<mode><mask_name>"
18554  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18555	(vec_concat:VI8F_256
18556	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18557	  (vec_select:<ssehalfvecmode>
18558	    (match_operand:VI8F_256 1 "register_operand" "v")
18559	    (parallel [(const_int 2) (const_int 3)]))))]
18560  "TARGET_AVX && <mask_avx512dq_condition>"
18561{
18562  if (TARGET_AVX512DQ)
18563    return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18564  else if (TARGET_AVX512VL)
18565    return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18566  else
18567    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18568}
18569  [(set_attr "type" "sselog")
18570   (set_attr "prefix_extra" "1")
18571   (set_attr "length_immediate" "1")
18572   (set_attr "prefix" "vex")
18573   (set_attr "mode" "<sseinsnmode>")])
18574
18575(define_insn "vec_set_hi_<mode><mask_name>"
18576  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18577	(vec_concat:VI8F_256
18578	  (vec_select:<ssehalfvecmode>
18579	    (match_operand:VI8F_256 1 "register_operand" "v")
18580	    (parallel [(const_int 0) (const_int 1)]))
18581	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18582  "TARGET_AVX && <mask_avx512dq_condition>"
18583{
18584  if (TARGET_AVX512DQ)
18585    return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18586  else if (TARGET_AVX512VL)
18587    return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18588  else
18589    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18590}
18591  [(set_attr "type" "sselog")
18592   (set_attr "prefix_extra" "1")
18593   (set_attr "length_immediate" "1")
18594   (set_attr "prefix" "vex")
18595   (set_attr "mode" "<sseinsnmode>")])
18596
18597(define_insn "vec_set_lo_<mode><mask_name>"
18598  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18599	(vec_concat:VI4F_256
18600	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18601	  (vec_select:<ssehalfvecmode>
18602	    (match_operand:VI4F_256 1 "register_operand" "v")
18603	    (parallel [(const_int 4) (const_int 5)
18604		       (const_int 6) (const_int 7)]))))]
18605  "TARGET_AVX"
18606{
18607  if (TARGET_AVX512VL)
18608    return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18609  else
18610    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18611}
18612  [(set_attr "type" "sselog")
18613   (set_attr "prefix_extra" "1")
18614   (set_attr "length_immediate" "1")
18615   (set_attr "prefix" "vex")
18616   (set_attr "mode" "<sseinsnmode>")])
18617
18618(define_insn "vec_set_hi_<mode><mask_name>"
18619  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18620	(vec_concat:VI4F_256
18621	  (vec_select:<ssehalfvecmode>
18622	    (match_operand:VI4F_256 1 "register_operand" "v")
18623	    (parallel [(const_int 0) (const_int 1)
18624		       (const_int 2) (const_int 3)]))
18625	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18626  "TARGET_AVX"
18627{
18628  if (TARGET_AVX512VL)
18629    return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18630  else
18631    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18632}
18633  [(set_attr "type" "sselog")
18634   (set_attr "prefix_extra" "1")
18635   (set_attr "length_immediate" "1")
18636   (set_attr "prefix" "vex")
18637   (set_attr "mode" "<sseinsnmode>")])
18638
18639(define_insn "vec_set_lo_v16hi"
18640  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18641	(vec_concat:V16HI
18642	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
18643	  (vec_select:V8HI
18644	    (match_operand:V16HI 1 "register_operand" "x,v")
18645	    (parallel [(const_int 8) (const_int 9)
18646		       (const_int 10) (const_int 11)
18647		       (const_int 12) (const_int 13)
18648		       (const_int 14) (const_int 15)]))))]
18649  "TARGET_AVX"
18650  "@
18651   vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18652   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18653  [(set_attr "type" "sselog")
18654   (set_attr "prefix_extra" "1")
18655   (set_attr "length_immediate" "1")
18656   (set_attr "prefix" "vex,evex")
18657   (set_attr "mode" "OI")])
18658
18659(define_insn "vec_set_hi_v16hi"
18660  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18661	(vec_concat:V16HI
18662	  (vec_select:V8HI
18663	    (match_operand:V16HI 1 "register_operand" "x,v")
18664	    (parallel [(const_int 0) (const_int 1)
18665		       (const_int 2) (const_int 3)
18666		       (const_int 4) (const_int 5)
18667		       (const_int 6) (const_int 7)]))
18668	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
18669  "TARGET_AVX"
18670  "@
18671   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18672   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18673  [(set_attr "type" "sselog")
18674   (set_attr "prefix_extra" "1")
18675   (set_attr "length_immediate" "1")
18676   (set_attr "prefix" "vex,evex")
18677   (set_attr "mode" "OI")])
18678
18679(define_insn "vec_set_lo_v32qi"
18680  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18681	(vec_concat:V32QI
18682	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
18683	  (vec_select:V16QI
18684	    (match_operand:V32QI 1 "register_operand" "x,v")
18685	    (parallel [(const_int 16) (const_int 17)
18686		       (const_int 18) (const_int 19)
18687		       (const_int 20) (const_int 21)
18688		       (const_int 22) (const_int 23)
18689		       (const_int 24) (const_int 25)
18690		       (const_int 26) (const_int 27)
18691		       (const_int 28) (const_int 29)
18692		       (const_int 30) (const_int 31)]))))]
18693  "TARGET_AVX"
18694  "@
18695   vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18696   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18697  [(set_attr "type" "sselog")
18698   (set_attr "prefix_extra" "1")
18699   (set_attr "length_immediate" "1")
18700   (set_attr "prefix" "vex,evex")
18701   (set_attr "mode" "OI")])
18702
18703(define_insn "vec_set_hi_v32qi"
18704  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18705	(vec_concat:V32QI
18706	  (vec_select:V16QI
18707	    (match_operand:V32QI 1 "register_operand" "x,v")
18708	    (parallel [(const_int 0) (const_int 1)
18709		       (const_int 2) (const_int 3)
18710		       (const_int 4) (const_int 5)
18711		       (const_int 6) (const_int 7)
18712		       (const_int 8) (const_int 9)
18713		       (const_int 10) (const_int 11)
18714		       (const_int 12) (const_int 13)
18715		       (const_int 14) (const_int 15)]))
18716	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
18717  "TARGET_AVX"
18718  "@
18719   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18720   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18721  [(set_attr "type" "sselog")
18722   (set_attr "prefix_extra" "1")
18723   (set_attr "length_immediate" "1")
18724   (set_attr "prefix" "vex,evex")
18725   (set_attr "mode" "OI")])
18726
18727(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18728  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
18729	(unspec:V48_AVX2
18730	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18731	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
18732	  UNSPEC_MASKMOV))]
18733  "TARGET_AVX"
18734  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
18735  [(set_attr "type" "sselog1")
18736   (set_attr "prefix_extra" "1")
18737   (set_attr "prefix" "vex")
18738   (set_attr "btver2_decode" "vector")
18739   (set_attr "mode" "<sseinsnmode>")])
18740
18741(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
18742  [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
18743	(unspec:V48_AVX2
18744	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
18745	   (match_operand:V48_AVX2 2 "register_operand" "x")
18746	   (match_dup 0)]
18747	  UNSPEC_MASKMOV))]
18748  "TARGET_AVX"
18749  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18750  [(set_attr "type" "sselog1")
18751   (set_attr "prefix_extra" "1")
18752   (set_attr "prefix" "vex")
18753   (set_attr "btver2_decode" "vector")
18754   (set_attr "mode" "<sseinsnmode>")])
18755
18756(define_expand "maskload<mode><sseintvecmodelower>"
18757  [(set (match_operand:V48_AVX2 0 "register_operand")
18758	(unspec:V48_AVX2
18759	  [(match_operand:<sseintvecmode> 2 "register_operand")
18760	   (match_operand:V48_AVX2 1 "memory_operand")]
18761	  UNSPEC_MASKMOV))]
18762  "TARGET_AVX")
18763
18764(define_expand "maskload<mode><avx512fmaskmodelower>"
18765  [(set (match_operand:V48_AVX512VL 0 "register_operand")
18766	(vec_merge:V48_AVX512VL
18767	  (match_operand:V48_AVX512VL 1 "memory_operand")
18768	  (match_dup 0)
18769	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18770  "TARGET_AVX512F")
18771
18772(define_expand "maskload<mode><avx512fmaskmodelower>"
18773  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18774	(vec_merge:VI12_AVX512VL
18775	  (match_operand:VI12_AVX512VL 1 "memory_operand")
18776	  (match_dup 0)
18777	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18778  "TARGET_AVX512BW")
18779
18780(define_expand "maskstore<mode><sseintvecmodelower>"
18781  [(set (match_operand:V48_AVX2 0 "memory_operand")
18782	(unspec:V48_AVX2
18783	  [(match_operand:<sseintvecmode> 2 "register_operand")
18784	   (match_operand:V48_AVX2 1 "register_operand")
18785	   (match_dup 0)]
18786	  UNSPEC_MASKMOV))]
18787  "TARGET_AVX")
18788
18789(define_expand "maskstore<mode><avx512fmaskmodelower>"
18790  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18791	(vec_merge:V48_AVX512VL
18792	  (match_operand:V48_AVX512VL 1 "register_operand")
18793	  (match_dup 0)
18794	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18795  "TARGET_AVX512F")
18796
18797(define_expand "maskstore<mode><avx512fmaskmodelower>"
18798  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18799	(vec_merge:VI12_AVX512VL
18800	  (match_operand:VI12_AVX512VL 1 "register_operand")
18801	  (match_dup 0)
18802	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18803  "TARGET_AVX512BW")
18804
18805(define_expand "cbranch<mode>4"
18806  [(set (reg:CC FLAGS_REG)
18807	(compare:CC (match_operand:VI48_AVX 1 "register_operand")
18808		    (match_operand:VI48_AVX 2 "nonimmediate_operand")))
18809   (set (pc) (if_then_else
18810	       (match_operator 0 "bt_comparison_operator"
18811		[(reg:CC FLAGS_REG) (const_int 0)])
18812	       (label_ref (match_operand 3))
18813	       (pc)))]
18814  "TARGET_SSE4_1"
18815{
18816  ix86_expand_branch (GET_CODE (operands[0]),
18817		      operands[1], operands[2], operands[3]);
18818  DONE;
18819})
18820
18821
18822(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
18823  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
18824	(unspec:AVX256MODE2P
18825	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18826	  UNSPEC_CAST))]
18827  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18828  "#"
18829  "&& reload_completed"
18830  [(set (match_dup 0) (match_dup 1))]
18831{
18832  if (REG_P (operands[0]))
18833    operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18834  else
18835    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18836				  <ssehalfvecmode>mode);
18837})
18838
18839;; Modes handled by vec_init expanders.
18840(define_mode_iterator VEC_INIT_MODE
18841  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18842   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18843   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18844   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
18845   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18846   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
18847   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
18848
18849;; Likewise, but for initialization from half sized vectors.
18850;; Thus, these are all VEC_INIT_MODE modes except V2??.
18851(define_mode_iterator VEC_INIT_HALF_MODE
18852  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18853   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18854   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18855   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
18856   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18857   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
18858   (V4TI "TARGET_AVX512F")])
18859
18860(define_expand "vec_init<mode><ssescalarmodelower>"
18861  [(match_operand:VEC_INIT_MODE 0 "register_operand")
18862   (match_operand 1)]
18863  "TARGET_SSE"
18864{
18865  ix86_expand_vector_init (false, operands[0], operands[1]);
18866  DONE;
18867})
18868
18869(define_expand "vec_init<mode><ssehalfvecmodelower>"
18870  [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
18871   (match_operand 1)]
18872  "TARGET_SSE"
18873{
18874  ix86_expand_vector_init (false, operands[0], operands[1]);
18875  DONE;
18876})
18877
18878(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18879  [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18880	(ashiftrt:VI48_AVX512F_AVX512VL
18881	  (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18882	  (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
18883  "TARGET_AVX2 && <mask_mode512bit_condition>"
18884  "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18885  [(set_attr "type" "sseishft")
18886   (set_attr "prefix" "maybe_evex")
18887   (set_attr "mode" "<sseinsnmode>")])
18888
18889(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18890  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18891	(ashiftrt:VI2_AVX512VL
18892	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18893	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18894  "TARGET_AVX512BW"
18895  "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18896  [(set_attr "type" "sseishft")
18897   (set_attr "prefix" "maybe_evex")
18898   (set_attr "mode" "<sseinsnmode>")])
18899
18900(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18901  [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18902	(any_lshift:VI48_AVX512F
18903	  (match_operand:VI48_AVX512F 1 "register_operand" "v")
18904	  (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
18905  "TARGET_AVX2 && <mask_mode512bit_condition>"
18906  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18907  [(set_attr "type" "sseishft")
18908   (set_attr "prefix" "maybe_evex")
18909   (set_attr "mode" "<sseinsnmode>")])
18910
18911(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18912  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18913	(any_lshift:VI2_AVX512VL
18914	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18915	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18916  "TARGET_AVX512BW"
18917  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18918  [(set_attr "type" "sseishft")
18919   (set_attr "prefix" "maybe_evex")
18920   (set_attr "mode" "<sseinsnmode>")])
18921
18922(define_insn "avx_vec_concat<mode>"
18923  [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
18924	(vec_concat:V_256_512
18925	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
18926	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,vm,C,C")))]
18927  "TARGET_AVX"
18928{
18929  switch (which_alternative)
18930    {
18931    case 0:
18932      return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18933    case 1:
18934      if (<MODE_SIZE> == 64)
18935	{
18936	  if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
18937	    return "vinsert<shuffletype>32x8\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18938	  else
18939	    return "vinsert<shuffletype>64x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18940	}
18941      else
18942	{
18943	  if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18944	    return "vinsert<shuffletype>64x2\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18945	  else
18946	    return "vinsert<shuffletype>32x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18947	}
18948    case 2:
18949    case 3:
18950      switch (get_attr_mode (insn))
18951	{
18952	case MODE_V16SF:
18953	  return "vmovaps\t{%1, %t0|%t0, %1}";
18954	case MODE_V8DF:
18955	  return "vmovapd\t{%1, %t0|%t0, %1}";
18956	case MODE_V8SF:
18957	  return "vmovaps\t{%1, %x0|%x0, %1}";
18958	case MODE_V4DF:
18959	  return "vmovapd\t{%1, %x0|%x0, %1}";
18960	case MODE_XI:
18961	  if (which_alternative == 2)
18962	    return "vmovdqa\t{%1, %t0|%t0, %1}";
18963	  else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18964	    return "vmovdqa64\t{%1, %t0|%t0, %1}";
18965	  else
18966	    return "vmovdqa32\t{%1, %t0|%t0, %1}";
18967	case MODE_OI:
18968	  if (which_alternative == 2)
18969	    return "vmovdqa\t{%1, %x0|%x0, %1}";
18970	  else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18971	    return "vmovdqa64\t{%1, %x0|%x0, %1}";
18972	  else
18973	    return "vmovdqa32\t{%1, %x0|%x0, %1}";
18974	default:
18975	  gcc_unreachable ();
18976	}
18977    default:
18978      gcc_unreachable ();
18979    }
18980}
18981  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
18982   (set_attr "prefix_extra" "1,1,*,*")
18983   (set_attr "length_immediate" "1,1,*,*")
18984   (set_attr "prefix" "maybe_evex")
18985   (set_attr "mode" "<sseinsnmode>")])
18986
18987(define_insn "vcvtph2ps<mask_name>"
18988  [(set (match_operand:V4SF 0 "register_operand" "=v")
18989	(vec_select:V4SF
18990	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18991		       UNSPEC_VCVTPH2PS)
18992	  (parallel [(const_int 0) (const_int 1)
18993		     (const_int 2) (const_int 3)])))]
18994  "TARGET_F16C || TARGET_AVX512VL"
18995  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18996  [(set_attr "type" "ssecvt")
18997   (set_attr "prefix" "maybe_evex")
18998   (set_attr "mode" "V4SF")])
18999
19000(define_insn "*vcvtph2ps_load<mask_name>"
19001  [(set (match_operand:V4SF 0 "register_operand" "=v")
19002	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
19003		     UNSPEC_VCVTPH2PS))]
19004  "TARGET_F16C || TARGET_AVX512VL"
19005  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19006  [(set_attr "type" "ssecvt")
19007   (set_attr "prefix" "vex")
19008   (set_attr "mode" "V8SF")])
19009
19010(define_insn "vcvtph2ps256<mask_name>"
19011  [(set (match_operand:V8SF 0 "register_operand" "=v")
19012	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
19013		     UNSPEC_VCVTPH2PS))]
19014  "TARGET_F16C || TARGET_AVX512VL"
19015  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19016  [(set_attr "type" "ssecvt")
19017   (set_attr "prefix" "vex")
19018   (set_attr "btver2_decode" "double")
19019   (set_attr "mode" "V8SF")])
19020
19021(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
19022  [(set (match_operand:V16SF 0 "register_operand" "=v")
19023	(unspec:V16SF
19024	  [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
19025	  UNSPEC_VCVTPH2PS))]
19026  "TARGET_AVX512F"
19027  "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19028  [(set_attr "type" "ssecvt")
19029   (set_attr "prefix" "evex")
19030   (set_attr "mode" "V16SF")])
19031
19032(define_expand "vcvtps2ph_mask"
19033  [(set (match_operand:V8HI 0 "register_operand")
19034	(vec_merge:V8HI
19035	  (vec_concat:V8HI
19036	    (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
19037			  (match_operand:SI 2 "const_0_to_255_operand")]
19038			  UNSPEC_VCVTPS2PH)
19039	    (match_dup 5))
19040	   (match_operand:V8HI 3 "vector_move_operand")
19041	   (match_operand:QI 4 "register_operand")))]
19042  "TARGET_AVX512VL"
19043  "operands[5] = CONST0_RTX (V4HImode);")
19044
19045(define_expand "vcvtps2ph"
19046  [(set (match_operand:V8HI 0 "register_operand")
19047	(vec_concat:V8HI
19048	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
19049			(match_operand:SI 2 "const_0_to_255_operand")]
19050		       UNSPEC_VCVTPS2PH)
19051	  (match_dup 3)))]
19052  "TARGET_F16C"
19053  "operands[3] = CONST0_RTX (V4HImode);")
19054
19055(define_insn "*vcvtps2ph<mask_name>"
19056  [(set (match_operand:V8HI 0 "register_operand" "=v")
19057	(vec_concat:V8HI
19058	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19059			(match_operand:SI 2 "const_0_to_255_operand" "N")]
19060		       UNSPEC_VCVTPS2PH)
19061	  (match_operand:V4HI 3 "const0_operand")))]
19062  "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
19063  "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
19064  [(set_attr "type" "ssecvt")
19065   (set_attr "prefix" "maybe_evex")
19066   (set_attr "mode" "V4SF")])
19067
19068(define_insn "*vcvtps2ph_store<mask_name>"
19069  [(set (match_operand:V4HI 0 "memory_operand" "=m")
19070	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19071		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
19072		     UNSPEC_VCVTPS2PH))]
19073  "TARGET_F16C || TARGET_AVX512VL"
19074  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19075  [(set_attr "type" "ssecvt")
19076   (set_attr "prefix" "maybe_evex")
19077   (set_attr "mode" "V4SF")])
19078
19079(define_insn "vcvtps2ph256<mask_name>"
19080  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
19081	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
19082		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
19083		     UNSPEC_VCVTPS2PH))]
19084  "TARGET_F16C || TARGET_AVX512VL"
19085  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19086  [(set_attr "type" "ssecvt")
19087   (set_attr "prefix" "maybe_evex")
19088   (set_attr "btver2_decode" "vector")
19089   (set_attr "mode" "V8SF")])
19090
19091(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
19092  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
19093	(unspec:V16HI
19094	  [(match_operand:V16SF 1 "register_operand" "v")
19095	   (match_operand:SI 2 "const_0_to_255_operand" "N")]
19096	  UNSPEC_VCVTPS2PH))]
19097  "TARGET_AVX512F"
19098  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19099  [(set_attr "type" "ssecvt")
19100   (set_attr "prefix" "evex")
19101   (set_attr "mode" "V16SF")])
19102
19103;; For gather* insn patterns
19104(define_mode_iterator VEC_GATHER_MODE
19105		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
19106(define_mode_attr VEC_GATHER_IDXSI
19107		      [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
19108		       (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
19109		       (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
19110		       (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
19111
19112(define_mode_attr VEC_GATHER_IDXDI
19113		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19114		       (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
19115		       (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
19116		       (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
19117
19118(define_mode_attr VEC_GATHER_SRCDI
19119		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19120		       (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
19121		       (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
19122		       (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
19123
19124(define_expand "avx2_gathersi<mode>"
19125  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19126		   (unspec:VEC_GATHER_MODE
19127		     [(match_operand:VEC_GATHER_MODE 1 "register_operand")
19128		      (mem:<ssescalarmode>
19129			(match_par_dup 6
19130			  [(match_operand 2 "vsib_address_operand")
19131			   (match_operand:<VEC_GATHER_IDXSI>
19132			      3 "register_operand")
19133			   (match_operand:SI 5 "const1248_operand ")]))
19134		      (mem:BLK (scratch))
19135		      (match_operand:VEC_GATHER_MODE 4 "register_operand")]
19136		     UNSPEC_GATHER))
19137	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19138  "TARGET_AVX2"
19139{
19140  operands[6]
19141    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19142					operands[5]), UNSPEC_VSIBADDR);
19143})
19144
19145(define_insn "*avx2_gathersi<mode>"
19146  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19147	(unspec:VEC_GATHER_MODE
19148	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
19149	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19150	     [(unspec:P
19151		[(match_operand:P 3 "vsib_address_operand" "Tv")
19152		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
19153		 (match_operand:SI 6 "const1248_operand" "n")]
19154		UNSPEC_VSIBADDR)])
19155	   (mem:BLK (scratch))
19156	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
19157	  UNSPEC_GATHER))
19158   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19159  "TARGET_AVX2"
19160  "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
19161  [(set_attr "type" "ssemov")
19162   (set_attr "prefix" "vex")
19163   (set_attr "mode" "<sseinsnmode>")])
19164
19165(define_insn "*avx2_gathersi<mode>_2"
19166  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19167	(unspec:VEC_GATHER_MODE
19168	  [(pc)
19169	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19170	     [(unspec:P
19171		[(match_operand:P 2 "vsib_address_operand" "Tv")
19172		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
19173		 (match_operand:SI 5 "const1248_operand" "n")]
19174		UNSPEC_VSIBADDR)])
19175	   (mem:BLK (scratch))
19176	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
19177	  UNSPEC_GATHER))
19178   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19179  "TARGET_AVX2"
19180  "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
19181  [(set_attr "type" "ssemov")
19182   (set_attr "prefix" "vex")
19183   (set_attr "mode" "<sseinsnmode>")])
19184
19185(define_expand "avx2_gatherdi<mode>"
19186  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19187		   (unspec:VEC_GATHER_MODE
19188		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19189		      (mem:<ssescalarmode>
19190			(match_par_dup 6
19191			  [(match_operand 2 "vsib_address_operand")
19192			   (match_operand:<VEC_GATHER_IDXDI>
19193			      3 "register_operand")
19194			   (match_operand:SI 5 "const1248_operand ")]))
19195		      (mem:BLK (scratch))
19196		      (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
19197		     UNSPEC_GATHER))
19198	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19199  "TARGET_AVX2"
19200{
19201  operands[6]
19202    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19203					operands[5]), UNSPEC_VSIBADDR);
19204})
19205
19206(define_insn "*avx2_gatherdi<mode>"
19207  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19208	(unspec:VEC_GATHER_MODE
19209	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19210	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19211	     [(unspec:P
19212		[(match_operand:P 3 "vsib_address_operand" "Tv")
19213		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19214		 (match_operand:SI 6 "const1248_operand" "n")]
19215		UNSPEC_VSIBADDR)])
19216	   (mem:BLK (scratch))
19217	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19218	  UNSPEC_GATHER))
19219   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19220  "TARGET_AVX2"
19221  "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
19222  [(set_attr "type" "ssemov")
19223   (set_attr "prefix" "vex")
19224   (set_attr "mode" "<sseinsnmode>")])
19225
19226(define_insn "*avx2_gatherdi<mode>_2"
19227  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19228	(unspec:VEC_GATHER_MODE
19229	  [(pc)
19230	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19231	     [(unspec:P
19232		[(match_operand:P 2 "vsib_address_operand" "Tv")
19233		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19234		 (match_operand:SI 5 "const1248_operand" "n")]
19235		UNSPEC_VSIBADDR)])
19236	   (mem:BLK (scratch))
19237	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19238	  UNSPEC_GATHER))
19239   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19240  "TARGET_AVX2"
19241{
19242  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19243    return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
19244  return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
19245}
19246  [(set_attr "type" "ssemov")
19247   (set_attr "prefix" "vex")
19248   (set_attr "mode" "<sseinsnmode>")])
19249
19250(define_insn "*avx2_gatherdi<mode>_3"
19251  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19252	(vec_select:<VEC_GATHER_SRCDI>
19253	  (unspec:VI4F_256
19254	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19255	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19256	       [(unspec:P
19257		  [(match_operand:P 3 "vsib_address_operand" "Tv")
19258		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19259		   (match_operand:SI 6 "const1248_operand" "n")]
19260		  UNSPEC_VSIBADDR)])
19261	     (mem:BLK (scratch))
19262	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19263	     UNSPEC_GATHER)
19264	  (parallel [(const_int 0) (const_int 1)
19265		     (const_int 2) (const_int 3)])))
19266   (clobber (match_scratch:VI4F_256 1 "=&x"))]
19267  "TARGET_AVX2"
19268  "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
19269  [(set_attr "type" "ssemov")
19270   (set_attr "prefix" "vex")
19271   (set_attr "mode" "<sseinsnmode>")])
19272
19273(define_insn "*avx2_gatherdi<mode>_4"
19274  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19275	(vec_select:<VEC_GATHER_SRCDI>
19276	  (unspec:VI4F_256
19277	    [(pc)
19278	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19279	       [(unspec:P
19280		  [(match_operand:P 2 "vsib_address_operand" "Tv")
19281		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19282		   (match_operand:SI 5 "const1248_operand" "n")]
19283		  UNSPEC_VSIBADDR)])
19284	     (mem:BLK (scratch))
19285	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19286	    UNSPEC_GATHER)
19287	  (parallel [(const_int 0) (const_int 1)
19288		     (const_int 2) (const_int 3)])))
19289   (clobber (match_scratch:VI4F_256 1 "=&x"))]
19290  "TARGET_AVX2"
19291  "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
19292  [(set_attr "type" "ssemov")
19293   (set_attr "prefix" "vex")
19294   (set_attr "mode" "<sseinsnmode>")])
19295
19296(define_expand "<avx512>_gathersi<mode>"
19297  [(parallel [(set (match_operand:VI48F 0 "register_operand")
19298		   (unspec:VI48F
19299		     [(match_operand:VI48F 1 "register_operand")
19300		      (match_operand:<avx512fmaskmode> 4 "register_operand")
19301		      (mem:<ssescalarmode>
19302			(match_par_dup 6
19303			  [(match_operand 2 "vsib_address_operand")
19304			   (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
19305			   (match_operand:SI 5 "const1248_operand")]))]
19306		     UNSPEC_GATHER))
19307	      (clobber (match_scratch:<avx512fmaskmode> 7))])]
19308  "TARGET_AVX512F"
19309{
19310  operands[6]
19311    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19312					operands[5]), UNSPEC_VSIBADDR);
19313})
19314
19315(define_insn "*avx512f_gathersi<mode>"
19316  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19317	(unspec:VI48F
19318	  [(match_operand:VI48F 1 "register_operand" "0")
19319	   (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
19320	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19321	     [(unspec:P
19322		[(match_operand:P 4 "vsib_address_operand" "Tv")
19323		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
19324		 (match_operand:SI 5 "const1248_operand" "n")]
19325		UNSPEC_VSIBADDR)])]
19326	  UNSPEC_GATHER))
19327   (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
19328  "TARGET_AVX512F"
19329;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
19330;; gas changed what it requires incompatibly.
19331  "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
19332  [(set_attr "type" "ssemov")
19333   (set_attr "prefix" "evex")
19334   (set_attr "mode" "<sseinsnmode>")])
19335
19336(define_insn "*avx512f_gathersi<mode>_2"
19337  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19338	(unspec:VI48F
19339	  [(pc)
19340	   (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19341	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19342	     [(unspec:P
19343		[(match_operand:P 3 "vsib_address_operand" "Tv")
19344		 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19345		 (match_operand:SI 4 "const1248_operand" "n")]
19346		UNSPEC_VSIBADDR)])]
19347	  UNSPEC_GATHER))
19348   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19349  "TARGET_AVX512F"
19350;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19351;; gas changed what it requires incompatibly.
19352  "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
19353  [(set_attr "type" "ssemov")
19354   (set_attr "prefix" "evex")
19355   (set_attr "mode" "<sseinsnmode>")])
19356
19357
19358(define_expand "<avx512>_gatherdi<mode>"
19359  [(parallel [(set (match_operand:VI48F 0 "register_operand")
19360		   (unspec:VI48F
19361		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19362		      (match_operand:QI 4 "register_operand")
19363		      (mem:<ssescalarmode>
19364			(match_par_dup 6
19365			  [(match_operand 2 "vsib_address_operand")
19366			   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
19367			   (match_operand:SI 5 "const1248_operand")]))]
19368		     UNSPEC_GATHER))
19369	      (clobber (match_scratch:QI 7))])]
19370  "TARGET_AVX512F"
19371{
19372  operands[6]
19373    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19374					operands[5]), UNSPEC_VSIBADDR);
19375})
19376
19377(define_insn "*avx512f_gatherdi<mode>"
19378  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19379	(unspec:VI48F
19380	  [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
19381	   (match_operand:QI 7 "register_operand" "2")
19382	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19383	     [(unspec:P
19384		[(match_operand:P 4 "vsib_address_operand" "Tv")
19385		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
19386		 (match_operand:SI 5 "const1248_operand" "n")]
19387		UNSPEC_VSIBADDR)])]
19388	  UNSPEC_GATHER))
19389   (clobber (match_scratch:QI 2 "=&Yk"))]
19390  "TARGET_AVX512F"
19391;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
19392;; gas changed what it requires incompatibly.
19393  "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
19394  [(set_attr "type" "ssemov")
19395   (set_attr "prefix" "evex")
19396   (set_attr "mode" "<sseinsnmode>")])
19397
19398(define_insn "*avx512f_gatherdi<mode>_2"
19399  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19400	(unspec:VI48F
19401	  [(pc)
19402	   (match_operand:QI 6 "register_operand" "1")
19403	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19404	     [(unspec:P
19405		[(match_operand:P 3 "vsib_address_operand" "Tv")
19406		 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19407		 (match_operand:SI 4 "const1248_operand" "n")]
19408		UNSPEC_VSIBADDR)])]
19409	  UNSPEC_GATHER))
19410   (clobber (match_scratch:QI 1 "=&Yk"))]
19411  "TARGET_AVX512F"
19412{
19413  /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19414     gas changed what it requires incompatibly.  */
19415  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19416    {
19417      if (<MODE_SIZE> != 64)
19418	return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
19419      else
19420	return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
19421    }
19422  return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
19423}
19424  [(set_attr "type" "ssemov")
19425   (set_attr "prefix" "evex")
19426   (set_attr "mode" "<sseinsnmode>")])
19427
19428(define_expand "<avx512>_scattersi<mode>"
19429  [(parallel [(set (mem:VI48F
19430		     (match_par_dup 5
19431		       [(match_operand 0 "vsib_address_operand")
19432			(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
19433			(match_operand:SI 4 "const1248_operand")]))
19434		   (unspec:VI48F
19435		     [(match_operand:<avx512fmaskmode> 1 "register_operand")
19436		      (match_operand:VI48F 3 "register_operand")]
19437		     UNSPEC_SCATTER))
19438	      (clobber (match_scratch:<avx512fmaskmode> 6))])]
19439  "TARGET_AVX512F"
19440{
19441  operands[5]
19442    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19443					operands[4]), UNSPEC_VSIBADDR);
19444})
19445
19446(define_insn "*avx512f_scattersi<mode>"
19447  [(set (match_operator:VI48F 5 "vsib_mem_operator"
19448	  [(unspec:P
19449	     [(match_operand:P 0 "vsib_address_operand" "Tv")
19450	      (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19451	      (match_operand:SI 4 "const1248_operand" "n")]
19452	     UNSPEC_VSIBADDR)])
19453	(unspec:VI48F
19454	  [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19455	   (match_operand:VI48F 3 "register_operand" "v")]
19456	  UNSPEC_SCATTER))
19457   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19458  "TARGET_AVX512F"
19459;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19460;; gas changed what it requires incompatibly.
19461  "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
19462  [(set_attr "type" "ssemov")
19463   (set_attr "prefix" "evex")
19464   (set_attr "mode" "<sseinsnmode>")])
19465
19466(define_expand "<avx512>_scatterdi<mode>"
19467  [(parallel [(set (mem:VI48F
19468		     (match_par_dup 5
19469		       [(match_operand 0 "vsib_address_operand")
19470			(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
19471			(match_operand:SI 4 "const1248_operand")]))
19472		   (unspec:VI48F
19473		     [(match_operand:QI 1 "register_operand")
19474		      (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
19475		     UNSPEC_SCATTER))
19476	      (clobber (match_scratch:QI 6))])]
19477  "TARGET_AVX512F"
19478{
19479  operands[5]
19480    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19481					operands[4]), UNSPEC_VSIBADDR);
19482})
19483
19484(define_insn "*avx512f_scatterdi<mode>"
19485  [(set (match_operator:VI48F 5 "vsib_mem_operator"
19486	  [(unspec:P
19487	     [(match_operand:P 0 "vsib_address_operand" "Tv")
19488	      (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19489	      (match_operand:SI 4 "const1248_operand" "n")]
19490	     UNSPEC_VSIBADDR)])
19491	(unspec:VI48F
19492	  [(match_operand:QI 6 "register_operand" "1")
19493	   (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
19494	  UNSPEC_SCATTER))
19495   (clobber (match_scratch:QI 1 "=&Yk"))]
19496  "TARGET_AVX512F"
19497;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
19498;; gas changed what it requires incompatibly.
19499  "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
19500  [(set_attr "type" "ssemov")
19501   (set_attr "prefix" "evex")
19502   (set_attr "mode" "<sseinsnmode>")])
19503
19504(define_insn "<avx512>_compress<mode>_mask"
19505  [(set (match_operand:VI48F 0 "register_operand" "=v")
19506	(unspec:VI48F
19507	  [(match_operand:VI48F 1 "register_operand" "v")
19508	   (match_operand:VI48F 2 "vector_move_operand" "0C")
19509	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19510	  UNSPEC_COMPRESS))]
19511  "TARGET_AVX512F"
19512  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19513  [(set_attr "type" "ssemov")
19514   (set_attr "prefix" "evex")
19515   (set_attr "mode" "<sseinsnmode>")])
19516
19517(define_insn "compress<mode>_mask"
19518  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
19519	(unspec:VI12_AVX512VLBW
19520	  [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
19521	   (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C")
19522	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19523	  UNSPEC_COMPRESS))]
19524  "TARGET_AVX512VBMI2"
19525  "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19526  [(set_attr "type" "ssemov")
19527   (set_attr "prefix" "evex")
19528   (set_attr "mode" "<sseinsnmode>")])
19529
19530(define_insn "<avx512>_compressstore<mode>_mask"
19531  [(set (match_operand:VI48F 0 "memory_operand" "=m")
19532	(unspec:VI48F
19533	  [(match_operand:VI48F 1 "register_operand" "x")
19534	   (match_dup 0)
19535	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19536	  UNSPEC_COMPRESS_STORE))]
19537  "TARGET_AVX512F"
19538  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19539  [(set_attr "type" "ssemov")
19540   (set_attr "prefix" "evex")
19541   (set_attr "memory" "store")
19542   (set_attr "mode" "<sseinsnmode>")])
19543
19544(define_insn "compressstore<mode>_mask"
19545  [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
19546	(unspec:VI12_AVX512VLBW
19547	  [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
19548	   (match_dup 0)
19549	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19550	  UNSPEC_COMPRESS_STORE))]
19551  "TARGET_AVX512VBMI2"
19552  "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19553  [(set_attr "type" "ssemov")
19554   (set_attr "prefix" "evex")
19555   (set_attr "memory" "store")
19556   (set_attr "mode" "<sseinsnmode>")])
19557
19558(define_expand "<avx512>_expand<mode>_maskz"
19559  [(set (match_operand:VI48F 0 "register_operand")
19560	(unspec:VI48F
19561	  [(match_operand:VI48F 1 "nonimmediate_operand")
19562	   (match_operand:VI48F 2 "vector_move_operand")
19563	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
19564	  UNSPEC_EXPAND))]
19565  "TARGET_AVX512F"
19566  "operands[2] = CONST0_RTX (<MODE>mode);")
19567
19568(define_insn "<avx512>_expand<mode>_mask"
19569  [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19570	(unspec:VI48F
19571	  [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19572	   (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
19573	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19574	  UNSPEC_EXPAND))]
19575  "TARGET_AVX512F"
19576  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19577  [(set_attr "type" "ssemov")
19578   (set_attr "prefix" "evex")
19579   (set_attr "memory" "none,load")
19580   (set_attr "mode" "<sseinsnmode>")])
19581
19582(define_insn "expand<mode>_mask"
19583  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
19584	(unspec:VI12_AVX512VLBW
19585	  [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
19586	   (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C,0C")
19587	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19588	  UNSPEC_EXPAND))]
19589  "TARGET_AVX512VBMI2"
19590  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19591  [(set_attr "type" "ssemov")
19592   (set_attr "prefix" "evex")
19593   (set_attr "memory" "none,load")
19594   (set_attr "mode" "<sseinsnmode>")])
19595
19596(define_expand "expand<mode>_maskz"
19597  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
19598	(unspec:VI12_AVX512VLBW
19599	  [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
19600	   (match_operand:VI12_AVX512VLBW 2 "vector_move_operand")
19601	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
19602	  UNSPEC_EXPAND))]
19603  "TARGET_AVX512VBMI2"
19604  "operands[2] = CONST0_RTX (<MODE>mode);")
19605
19606(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19607  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19608	(unspec:VF_AVX512VL
19609	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19610	   (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19611	   (match_operand:SI 3 "const_0_to_15_operand")]
19612	  UNSPEC_RANGE))]
19613  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
19614  "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
19615  [(set_attr "type" "sse")
19616   (set_attr "prefix" "evex")
19617   (set_attr "mode" "<MODE>")])
19618
19619(define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
19620  [(set (match_operand:VF_128 0 "register_operand" "=v")
19621	(vec_merge:VF_128
19622	  (unspec:VF_128
19623	    [(match_operand:VF_128 1 "register_operand" "v")
19624	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19625	     (match_operand:SI 3 "const_0_to_15_operand")]
19626	    UNSPEC_RANGE)
19627	  (match_dup 1)
19628	  (const_int 1)))]
19629  "TARGET_AVX512DQ"
19630  "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
19631  [(set_attr "type" "sse")
19632   (set_attr "prefix" "evex")
19633   (set_attr "mode" "<MODE>")])
19634
19635(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19636  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19637          (unspec:<avx512fmaskmode>
19638            [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19639             (match_operand:QI 2 "const_0_to_255_operand" "n")]
19640             UNSPEC_FPCLASS))]
19641   "TARGET_AVX512DQ"
19642   "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19643  [(set_attr "type" "sse")
19644   (set_attr "length_immediate" "1")
19645   (set_attr "prefix" "evex")
19646   (set_attr "mode" "<MODE>")])
19647
19648(define_insn "avx512dq_vmfpclass<mode>"
19649  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19650	(and:<avx512fmaskmode>
19651	  (unspec:<avx512fmaskmode>
19652	    [(match_operand:VF_128 1 "register_operand" "v")
19653             (match_operand:QI 2 "const_0_to_255_operand" "n")]
19654	    UNSPEC_FPCLASS)
19655	  (const_int 1)))]
19656   "TARGET_AVX512DQ"
19657   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19658  [(set_attr "type" "sse")
19659   (set_attr "length_immediate" "1")
19660   (set_attr "prefix" "evex")
19661   (set_attr "mode" "<MODE>")])
19662
19663(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19664  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19665	(unspec:VF_AVX512VL
19666	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
19667	   (match_operand:SI 2 "const_0_to_15_operand")]
19668	  UNSPEC_GETMANT))]
19669  "TARGET_AVX512F"
19670  "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
19671  [(set_attr "prefix" "evex")
19672   (set_attr "mode" "<MODE>")])
19673
19674(define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
19675  [(set (match_operand:VF_128 0 "register_operand" "=v")
19676	(vec_merge:VF_128
19677	  (unspec:VF_128
19678	    [(match_operand:VF_128 1 "register_operand" "v")
19679	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19680	     (match_operand:SI 3 "const_0_to_15_operand")]
19681	    UNSPEC_GETMANT)
19682	  (match_dup 1)
19683	  (const_int 1)))]
19684   "TARGET_AVX512F"
19685   "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
19686   [(set_attr "prefix" "evex")
19687   (set_attr "mode" "<ssescalarmode>")])
19688
19689;; The correct representation for this is absolutely enormous, and
19690;; surely not generally useful.
19691(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19692  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19693	(unspec:VI2_AVX512VL
19694	  [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19695	   (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19696	   (match_operand:SI 3 "const_0_to_255_operand")]
19697	  UNSPEC_DBPSADBW))]
19698   "TARGET_AVX512BW"
19699  "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19700  [(set_attr "type" "sselog1")
19701   (set_attr "length_immediate" "1")
19702   (set_attr "prefix" "evex")
19703   (set_attr "mode" "<sseinsnmode>")])
19704
19705(define_insn "clz<mode>2<mask_name>"
19706  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19707	(clz:VI48_AVX512VL
19708	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
19709  "TARGET_AVX512CD"
19710  "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19711  [(set_attr "type" "sse")
19712   (set_attr "prefix" "evex")
19713   (set_attr "mode" "<sseinsnmode>")])
19714
19715(define_insn "<mask_codefor>conflict<mode><mask_name>"
19716  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19717	(unspec:VI48_AVX512VL
19718	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
19719	  UNSPEC_CONFLICT))]
19720  "TARGET_AVX512CD"
19721  "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19722  [(set_attr "type" "sse")
19723   (set_attr "prefix" "evex")
19724   (set_attr "mode" "<sseinsnmode>")])
19725
19726(define_insn "sha1msg1"
19727  [(set (match_operand:V4SI 0 "register_operand" "=x")
19728	(unspec:V4SI
19729	  [(match_operand:V4SI 1 "register_operand" "0")
19730	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19731	  UNSPEC_SHA1MSG1))]
19732  "TARGET_SHA"
19733  "sha1msg1\t{%2, %0|%0, %2}"
19734  [(set_attr "type" "sselog1")
19735   (set_attr "mode" "TI")])
19736
19737(define_insn "sha1msg2"
19738  [(set (match_operand:V4SI 0 "register_operand" "=x")
19739	(unspec:V4SI
19740	  [(match_operand:V4SI 1 "register_operand" "0")
19741	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19742	  UNSPEC_SHA1MSG2))]
19743  "TARGET_SHA"
19744  "sha1msg2\t{%2, %0|%0, %2}"
19745  [(set_attr "type" "sselog1")
19746   (set_attr "mode" "TI")])
19747
19748(define_insn "sha1nexte"
19749  [(set (match_operand:V4SI 0 "register_operand" "=x")
19750	(unspec:V4SI
19751	  [(match_operand:V4SI 1 "register_operand" "0")
19752	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19753	  UNSPEC_SHA1NEXTE))]
19754  "TARGET_SHA"
19755  "sha1nexte\t{%2, %0|%0, %2}"
19756  [(set_attr "type" "sselog1")
19757   (set_attr "mode" "TI")])
19758
19759(define_insn "sha1rnds4"
19760  [(set (match_operand:V4SI 0 "register_operand" "=x")
19761	(unspec:V4SI
19762	  [(match_operand:V4SI 1 "register_operand" "0")
19763	   (match_operand:V4SI 2 "vector_operand" "xBm")
19764	   (match_operand:SI 3 "const_0_to_3_operand" "n")]
19765	  UNSPEC_SHA1RNDS4))]
19766  "TARGET_SHA"
19767  "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19768  [(set_attr "type" "sselog1")
19769   (set_attr "length_immediate" "1")
19770   (set_attr "mode" "TI")])
19771
19772(define_insn "sha256msg1"
19773  [(set (match_operand:V4SI 0 "register_operand" "=x")
19774	(unspec:V4SI
19775	  [(match_operand:V4SI 1 "register_operand" "0")
19776	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19777	  UNSPEC_SHA256MSG1))]
19778  "TARGET_SHA"
19779  "sha256msg1\t{%2, %0|%0, %2}"
19780  [(set_attr "type" "sselog1")
19781   (set_attr "mode" "TI")])
19782
19783(define_insn "sha256msg2"
19784  [(set (match_operand:V4SI 0 "register_operand" "=x")
19785	(unspec:V4SI
19786	  [(match_operand:V4SI 1 "register_operand" "0")
19787	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19788	  UNSPEC_SHA256MSG2))]
19789  "TARGET_SHA"
19790  "sha256msg2\t{%2, %0|%0, %2}"
19791  [(set_attr "type" "sselog1")
19792   (set_attr "mode" "TI")])
19793
19794(define_insn "sha256rnds2"
19795  [(set (match_operand:V4SI 0 "register_operand" "=x")
19796	(unspec:V4SI
19797	  [(match_operand:V4SI 1 "register_operand" "0")
19798	   (match_operand:V4SI 2 "vector_operand" "xBm")
19799	   (match_operand:V4SI 3 "register_operand" "Yz")]
19800	  UNSPEC_SHA256RNDS2))]
19801  "TARGET_SHA"
19802  "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19803  [(set_attr "type" "sselog1")
19804   (set_attr "length_immediate" "1")
19805   (set_attr "mode" "TI")])
19806
19807(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19808  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19809	(unspec:AVX512MODE2P
19810	  [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19811	  UNSPEC_CAST))]
19812  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19813  "#"
19814  "&& reload_completed"
19815  [(set (match_dup 0) (match_dup 1))]
19816{
19817  if (REG_P (operands[0]))
19818    operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
19819  else
19820    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19821				  <ssequartermode>mode);
19822})
19823
19824(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19825  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19826	(unspec:AVX512MODE2P
19827	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19828	  UNSPEC_CAST))]
19829  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19830  "#"
19831  "&& reload_completed"
19832  [(set (match_dup 0) (match_dup 1))]
19833{
19834  if (REG_P (operands[0]))
19835    operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
19836  else
19837    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19838				  <ssehalfvecmode>mode);
19839})
19840
19841(define_int_iterator VPMADD52
19842	[UNSPEC_VPMADD52LUQ
19843	 UNSPEC_VPMADD52HUQ])
19844
19845(define_int_attr vpmadd52type
19846  [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19847
19848(define_expand "vpamdd52huq<mode>_maskz"
19849  [(match_operand:VI8_AVX512VL 0 "register_operand")
19850   (match_operand:VI8_AVX512VL 1 "register_operand")
19851   (match_operand:VI8_AVX512VL 2 "register_operand")
19852   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19853   (match_operand:<avx512fmaskmode> 4 "register_operand")]
19854  "TARGET_AVX512IFMA"
19855{
19856  emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19857    operands[0], operands[1], operands[2], operands[3],
19858    CONST0_RTX (<MODE>mode), operands[4]));
19859  DONE;
19860})
19861
19862(define_expand "vpamdd52luq<mode>_maskz"
19863  [(match_operand:VI8_AVX512VL 0 "register_operand")
19864   (match_operand:VI8_AVX512VL 1 "register_operand")
19865   (match_operand:VI8_AVX512VL 2 "register_operand")
19866   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19867   (match_operand:<avx512fmaskmode> 4 "register_operand")]
19868  "TARGET_AVX512IFMA"
19869{
19870  emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19871    operands[0], operands[1], operands[2], operands[3],
19872    CONST0_RTX (<MODE>mode), operands[4]));
19873  DONE;
19874})
19875
19876(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19877  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19878	(unspec:VI8_AVX512VL
19879	  [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19880	   (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19881	   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19882	  VPMADD52))]
19883  "TARGET_AVX512IFMA"
19884  "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19885  [(set_attr "type" "ssemuladd")
19886   (set_attr "prefix" "evex")
19887   (set_attr "mode" "<sseinsnmode>")])
19888
19889(define_insn "vpamdd52<vpmadd52type><mode>_mask"
19890  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19891	(vec_merge:VI8_AVX512VL
19892	  (unspec:VI8_AVX512VL
19893	    [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19894	     (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19895	     (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19896	    VPMADD52)
19897	  (match_dup 1)
19898	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19899  "TARGET_AVX512IFMA"
19900  "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19901  [(set_attr "type" "ssemuladd")
19902   (set_attr "prefix" "evex")
19903   (set_attr "mode" "<sseinsnmode>")])
19904
19905(define_insn "vpmultishiftqb<mode><mask_name>"
19906  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19907	(unspec:VI1_AVX512VL
19908	  [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19909	   (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19910	  UNSPEC_VPMULTISHIFT))]
19911  "TARGET_AVX512VBMI"
19912  "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19913  [(set_attr "type" "sselog")
19914   (set_attr "prefix" "evex")
19915   (set_attr "mode" "<sseinsnmode>")])
19916
19917(define_mode_iterator IMOD4
19918  [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
19919
19920(define_mode_attr imod4_narrow
19921  [(V64SF "V16SF") (V64SI "V16SI")])
19922
19923(define_expand "mov<mode>"
19924  [(set (match_operand:IMOD4 0 "nonimmediate_operand")
19925	(match_operand:IMOD4 1 "vector_move_operand"))]
19926  "TARGET_AVX512F"
19927{
19928  ix86_expand_vector_move (<MODE>mode, operands);
19929  DONE;
19930})
19931
19932(define_insn_and_split "*mov<mode>_internal"
19933  [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
19934	(match_operand:IMOD4 1 "vector_move_operand"  " C,vm,v"))]
19935  "TARGET_AVX512F
19936   && (register_operand (operands[0], <MODE>mode)
19937       || register_operand (operands[1], <MODE>mode))"
19938  "#"
19939  "&& reload_completed"
19940  [(const_int 0)]
19941{
19942  rtx op0, op1;
19943  int i;
19944
19945  for (i = 0; i < 4; i++)
19946    {
19947      op0 = simplify_subreg
19948	     (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
19949      op1 = simplify_subreg
19950	     (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
19951      emit_move_insn (op0, op1);
19952    }
19953  DONE;
19954})
19955
19956(define_insn "avx5124fmaddps_4fmaddps"
19957  [(set (match_operand:V16SF 0 "register_operand" "=v")
19958	(unspec:V16SF
19959	  [(match_operand:V16SF 1 "register_operand" "0")
19960	   (match_operand:V64SF 2 "register_operand" "Yh")
19961	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19962  "TARGET_AVX5124FMAPS"
19963  "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19964   [(set_attr ("type") ("ssemuladd"))
19965    (set_attr ("prefix") ("evex"))
19966    (set_attr ("mode") ("V16SF"))])
19967
19968(define_insn "avx5124fmaddps_4fmaddps_mask"
19969  [(set (match_operand:V16SF 0 "register_operand" "=v")
19970	(vec_merge:V16SF
19971	  (unspec:V16SF
19972	     [(match_operand:V64SF 1 "register_operand" "Yh")
19973	      (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19974	  (match_operand:V16SF 3 "register_operand" "0")
19975	  (match_operand:HI 4 "register_operand" "Yk")))]
19976  "TARGET_AVX5124FMAPS"
19977  "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
19978   [(set_attr ("type") ("ssemuladd"))
19979    (set_attr ("prefix") ("evex"))
19980    (set_attr ("mode") ("V16SF"))])
19981
19982(define_insn "avx5124fmaddps_4fmaddps_maskz"
19983  [(set (match_operand:V16SF 0 "register_operand" "=v")
19984	(vec_merge:V16SF
19985	  (unspec:V16SF
19986	    [(match_operand:V16SF 1 "register_operand" "0")
19987	     (match_operand:V64SF 2 "register_operand" "Yh")
19988	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19989	  (match_operand:V16SF 4 "const0_operand" "C")
19990	  (match_operand:HI 5 "register_operand" "Yk")))]
19991  "TARGET_AVX5124FMAPS"
19992  "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
19993   [(set_attr ("type") ("ssemuladd"))
19994    (set_attr ("prefix") ("evex"))
19995    (set_attr ("mode") ("V16SF"))])
19996
19997(define_insn "avx5124fmaddps_4fmaddss"
19998  [(set (match_operand:V4SF 0 "register_operand" "=v")
19999	(unspec:V4SF
20000	  [(match_operand:V4SF 1 "register_operand" "0")
20001	   (match_operand:V64SF 2 "register_operand" "Yh")
20002	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
20003  "TARGET_AVX5124FMAPS"
20004  "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
20005   [(set_attr ("type") ("ssemuladd"))
20006    (set_attr ("prefix") ("evex"))
20007    (set_attr ("mode") ("SF"))])
20008
20009(define_insn "avx5124fmaddps_4fmaddss_mask"
20010  [(set (match_operand:V4SF 0 "register_operand" "=v")
20011	(vec_merge:V4SF
20012	  (unspec:V4SF
20013	    [(match_operand:V64SF 1 "register_operand" "Yh")
20014	     (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
20015	  (match_operand:V4SF 3 "register_operand" "0")
20016	  (match_operand:QI 4 "register_operand" "Yk")))]
20017  "TARGET_AVX5124FMAPS"
20018  "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
20019   [(set_attr ("type") ("ssemuladd"))
20020    (set_attr ("prefix") ("evex"))
20021    (set_attr ("mode") ("SF"))])
20022
20023(define_insn "avx5124fmaddps_4fmaddss_maskz"
20024  [(set (match_operand:V4SF 0 "register_operand" "=v")
20025	(vec_merge:V4SF
20026	  (unspec:V4SF
20027	    [(match_operand:V4SF 1 "register_operand" "0")
20028	     (match_operand:V64SF 2 "register_operand" "Yh")
20029	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
20030	  (match_operand:V4SF 4 "const0_operand" "C")
20031	  (match_operand:QI 5 "register_operand" "Yk")))]
20032  "TARGET_AVX5124FMAPS"
20033  "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
20034   [(set_attr ("type") ("ssemuladd"))
20035    (set_attr ("prefix") ("evex"))
20036    (set_attr ("mode") ("SF"))])
20037
20038(define_insn "avx5124fmaddps_4fnmaddps"
20039  [(set (match_operand:V16SF 0 "register_operand" "=v")
20040	(unspec:V16SF
20041	  [(match_operand:V16SF 1 "register_operand" "0")
20042	   (match_operand:V64SF 2 "register_operand" "Yh")
20043	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
20044  "TARGET_AVX5124FMAPS"
20045  "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
20046   [(set_attr ("type") ("ssemuladd"))
20047    (set_attr ("prefix") ("evex"))
20048    (set_attr ("mode") ("V16SF"))])
20049
20050(define_insn "avx5124fmaddps_4fnmaddps_mask"
20051  [(set (match_operand:V16SF 0 "register_operand" "=v")
20052	(vec_merge:V16SF
20053	  (unspec:V16SF
20054	     [(match_operand:V64SF 1 "register_operand" "Yh")
20055	      (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20056	  (match_operand:V16SF 3 "register_operand" "0")
20057	  (match_operand:HI 4 "register_operand" "Yk")))]
20058  "TARGET_AVX5124FMAPS"
20059  "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
20060   [(set_attr ("type") ("ssemuladd"))
20061    (set_attr ("prefix") ("evex"))
20062    (set_attr ("mode") ("V16SF"))])
20063
20064(define_insn "avx5124fmaddps_4fnmaddps_maskz"
20065  [(set (match_operand:V16SF 0 "register_operand" "=v")
20066	(vec_merge:V16SF
20067	  (unspec:V16SF
20068	    [(match_operand:V16SF 1 "register_operand" "0")
20069	     (match_operand:V64SF 2 "register_operand" "Yh")
20070	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20071	  (match_operand:V16SF 4 "const0_operand" "C")
20072	  (match_operand:HI 5 "register_operand" "Yk")))]
20073  "TARGET_AVX5124FMAPS"
20074  "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
20075   [(set_attr ("type") ("ssemuladd"))
20076    (set_attr ("prefix") ("evex"))
20077    (set_attr ("mode") ("V16SF"))])
20078
20079(define_insn "avx5124fmaddps_4fnmaddss"
20080  [(set (match_operand:V4SF 0 "register_operand" "=v")
20081	(unspec:V4SF
20082	  [(match_operand:V4SF 1 "register_operand" "0")
20083	   (match_operand:V64SF 2 "register_operand" "Yh")
20084	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
20085  "TARGET_AVX5124FMAPS"
20086  "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
20087   [(set_attr ("type") ("ssemuladd"))
20088    (set_attr ("prefix") ("evex"))
20089    (set_attr ("mode") ("SF"))])
20090
20091(define_insn "avx5124fmaddps_4fnmaddss_mask"
20092  [(set (match_operand:V4SF 0 "register_operand" "=v")
20093	(vec_merge:V4SF
20094	  (unspec:V4SF
20095	    [(match_operand:V64SF 1 "register_operand" "Yh")
20096	     (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20097	  (match_operand:V4SF 3 "register_operand" "0")
20098	  (match_operand:QI 4 "register_operand" "Yk")))]
20099  "TARGET_AVX5124FMAPS"
20100  "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
20101   [(set_attr ("type") ("ssemuladd"))
20102    (set_attr ("prefix") ("evex"))
20103    (set_attr ("mode") ("SF"))])
20104
20105(define_insn "avx5124fmaddps_4fnmaddss_maskz"
20106  [(set (match_operand:V4SF 0 "register_operand" "=v")
20107	(vec_merge:V4SF
20108	  (unspec:V4SF
20109	    [(match_operand:V4SF 1 "register_operand" "0")
20110	     (match_operand:V64SF 2 "register_operand" "Yh")
20111	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20112	  (match_operand:V4SF 4 "const0_operand" "C")
20113	  (match_operand:QI 5 "register_operand" "Yk")))]
20114  "TARGET_AVX5124FMAPS"
20115  "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
20116   [(set_attr ("type") ("ssemuladd"))
20117    (set_attr ("prefix") ("evex"))
20118    (set_attr ("mode") ("SF"))])
20119
20120(define_insn "avx5124vnniw_vp4dpwssd"
20121  [(set (match_operand:V16SI 0 "register_operand" "=v")
20122	(unspec:V16SI
20123	  [(match_operand:V16SI 1 "register_operand" "0")
20124	   (match_operand:V64SI 2 "register_operand" "Yh")
20125	   (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
20126  "TARGET_AVX5124VNNIW"
20127  "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
20128   [(set_attr ("type") ("ssemuladd"))
20129    (set_attr ("prefix") ("evex"))
20130    (set_attr ("mode") ("TI"))])
20131
20132(define_insn "avx5124vnniw_vp4dpwssd_mask"
20133  [(set (match_operand:V16SI 0 "register_operand" "=v")
20134	(vec_merge:V16SI
20135	  (unspec:V16SI
20136	     [(match_operand:V64SI 1 "register_operand" "Yh")
20137	      (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20138	  (match_operand:V16SI 3 "register_operand" "0")
20139	  (match_operand:HI 4 "register_operand" "Yk")))]
20140  "TARGET_AVX5124VNNIW"
20141  "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
20142   [(set_attr ("type") ("ssemuladd"))
20143    (set_attr ("prefix") ("evex"))
20144    (set_attr ("mode") ("TI"))])
20145
20146(define_insn "avx5124vnniw_vp4dpwssd_maskz"
20147  [(set (match_operand:V16SI 0 "register_operand" "=v")
20148	(vec_merge:V16SI
20149	  (unspec:V16SI
20150	    [(match_operand:V16SI 1 "register_operand" "0")
20151	     (match_operand:V64SI 2 "register_operand" "Yh")
20152	     (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20153	  (match_operand:V16SI 4 "const0_operand" "C")
20154	  (match_operand:HI 5 "register_operand" "Yk")))]
20155  "TARGET_AVX5124VNNIW"
20156  "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
20157   [(set_attr ("type") ("ssemuladd"))
20158    (set_attr ("prefix") ("evex"))
20159    (set_attr ("mode") ("TI"))])
20160
20161(define_insn "avx5124vnniw_vp4dpwssds"
20162  [(set (match_operand:V16SI 0 "register_operand" "=v")
20163	(unspec:V16SI
20164	  [(match_operand:V16SI 1 "register_operand" "0")
20165	   (match_operand:V64SI 2 "register_operand" "Yh")
20166	   (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
20167  "TARGET_AVX5124VNNIW"
20168  "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
20169   [(set_attr ("type") ("ssemuladd"))
20170    (set_attr ("prefix") ("evex"))
20171    (set_attr ("mode") ("TI"))])
20172
20173(define_insn "avx5124vnniw_vp4dpwssds_mask"
20174  [(set (match_operand:V16SI 0 "register_operand" "=v")
20175	(vec_merge:V16SI
20176	  (unspec:V16SI
20177	     [(match_operand:V64SI 1 "register_operand" "Yh")
20178	      (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20179	  (match_operand:V16SI 3 "register_operand" "0")
20180	  (match_operand:HI 4 "register_operand" "Yk")))]
20181  "TARGET_AVX5124VNNIW"
20182  "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
20183   [(set_attr ("type") ("ssemuladd"))
20184    (set_attr ("prefix") ("evex"))
20185    (set_attr ("mode") ("TI"))])
20186
20187(define_insn "avx5124vnniw_vp4dpwssds_maskz"
20188  [(set (match_operand:V16SI 0 "register_operand" "=v")
20189	(vec_merge:V16SI
20190	  (unspec:V16SI
20191	    [(match_operand:V16SI 1 "register_operand" "0")
20192	     (match_operand:V64SI 2 "register_operand" "Yh")
20193	     (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20194	  (match_operand:V16SI 4 "const0_operand" "C")
20195	  (match_operand:HI 5 "register_operand" "Yk")))]
20196  "TARGET_AVX5124VNNIW"
20197  "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
20198   [(set_attr ("type") ("ssemuladd"))
20199    (set_attr ("prefix") ("evex"))
20200    (set_attr ("mode") ("TI"))])
20201
20202(define_insn "vpopcount<mode><mask_name>"
20203  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
20204	(popcount:VI48_AVX512VL
20205	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
20206  "TARGET_AVX512VPOPCNTDQ"
20207  "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20208
20209;; Save multiple registers out-of-line.
20210(define_insn "save_multiple<mode>"
20211  [(match_parallel 0 "save_multiple"
20212    [(use (match_operand:P 1 "symbol_operand"))])]
20213  "TARGET_SSE && TARGET_64BIT"
20214  "call\t%P1")
20215
20216;; Restore multiple registers out-of-line.
20217(define_insn "restore_multiple<mode>"
20218  [(match_parallel 0 "restore_multiple"
20219    [(use (match_operand:P 1 "symbol_operand"))])]
20220  "TARGET_SSE && TARGET_64BIT"
20221  "call\t%P1")
20222
20223;; Restore multiple registers out-of-line and return.
20224(define_insn "restore_multiple_and_return<mode>"
20225  [(match_parallel 0 "restore_multiple"
20226    [(return)
20227     (use (match_operand:P 1 "symbol_operand"))
20228     (set (reg:DI SP_REG) (reg:DI R10_REG))
20229    ])]
20230  "TARGET_SSE && TARGET_64BIT"
20231  "jmp\t%P1")
20232
20233;; Restore multiple registers out-of-line when hard frame pointer is used,
20234;; perform the leave operation prior to returning (from the function).
20235(define_insn "restore_multiple_leave_return<mode>"
20236  [(match_parallel 0 "restore_multiple"
20237    [(return)
20238     (use (match_operand:P 1 "symbol_operand"))
20239     (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
20240     (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
20241     (clobber (mem:BLK (scratch)))
20242    ])]
20243  "TARGET_SSE && TARGET_64BIT"
20244  "jmp\t%P1")
20245
20246(define_insn "vpopcount<mode><mask_name>"
20247  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
20248	(popcount:VI12_AVX512VL
20249	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
20250  "TARGET_AVX512BITALG"
20251  "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20252
20253(define_insn "vgf2p8affineinvqb_<mode><mask_name>"
20254  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20255	(unspec:VI1_AVX512F
20256	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20257	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20258	   (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20259	  UNSPEC_GF2P8AFFINEINV))]
20260  "TARGET_GFNI"
20261  "@
20262   gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
20263   vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20264   vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20265  [(set_attr "isa" "noavx,avx,avx512f")
20266   (set_attr "prefix_data16" "1,*,*")
20267   (set_attr "prefix_extra" "1")
20268   (set_attr "prefix" "orig,maybe_evex,evex")
20269   (set_attr "mode" "<sseinsnmode>")])
20270
20271(define_insn "vgf2p8affineqb_<mode><mask_name>"
20272  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20273	(unspec:VI1_AVX512F
20274	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20275	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20276	   (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20277	  UNSPEC_GF2P8AFFINE))]
20278  "TARGET_GFNI"
20279  "@
20280   gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
20281   vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20282   vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20283  [(set_attr "isa" "noavx,avx,avx512f")
20284   (set_attr "prefix_data16" "1,*,*")
20285   (set_attr "prefix_extra" "1")
20286   (set_attr "prefix" "orig,maybe_evex,evex")
20287   (set_attr "mode" "<sseinsnmode>")])
20288
20289(define_insn "vgf2p8mulb_<mode><mask_name>"
20290  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20291	(unspec:VI1_AVX512F
20292	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20293	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
20294	  UNSPEC_GF2P8MUL))]
20295  "TARGET_GFNI"
20296  "@
20297   gf2p8mulb\t{%2, %0| %0, %2}
20298   vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
20299   vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
20300  [(set_attr "isa" "noavx,avx,avx512f")
20301   (set_attr "prefix_data16" "1,*,*")
20302   (set_attr "prefix_extra" "1")
20303   (set_attr "prefix" "orig,maybe_evex,evex")
20304   (set_attr "mode" "<sseinsnmode>")])
20305
20306(define_insn "vpshrd_<mode><mask_name>"
20307  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20308	(unspec:VI248_AVX512VL
20309	  [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
20310	   (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
20311	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
20312	  UNSPEC_VPSHRD))]
20313  "TARGET_AVX512VBMI2"
20314  "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20315   [(set_attr ("prefix") ("evex"))])
20316
20317(define_insn "vpshld_<mode><mask_name>"
20318  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20319	(unspec:VI248_AVX512VL
20320	  [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
20321	   (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
20322	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
20323	  UNSPEC_VPSHLD))]
20324  "TARGET_AVX512VBMI2"
20325  "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20326   [(set_attr ("prefix") ("evex"))])
20327
20328(define_insn "vpshrdv_<mode>"
20329  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20330	(unspec:VI248_AVX512VL
20331	  [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20332	   (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20333	   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20334	  UNSPEC_VPSHRDV))]
20335  "TARGET_AVX512VBMI2"
20336  "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
20337   [(set_attr ("prefix") ("evex"))
20338   (set_attr "mode" "<sseinsnmode>")])
20339
20340(define_insn "vpshrdv_<mode>_mask"
20341  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20342	(vec_merge:VI248_AVX512VL
20343	  (unspec:VI248_AVX512VL
20344	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20345	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20346	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20347	    UNSPEC_VPSHRDV)
20348	  (match_dup 1)
20349	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20350  "TARGET_AVX512VBMI2"
20351  "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20352   [(set_attr ("prefix") ("evex"))
20353   (set_attr "mode" "<sseinsnmode>")])
20354
20355(define_expand "vpshrdv_<mode>_maskz"
20356  [(match_operand:VI248_AVX512VL 0 "register_operand")
20357   (match_operand:VI248_AVX512VL 1 "register_operand")
20358   (match_operand:VI248_AVX512VL 2 "register_operand")
20359   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
20360   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20361  "TARGET_AVX512VBMI2"
20362{
20363  emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
20364					 operands[2], operands[3],
20365					 CONST0_RTX (<MODE>mode),
20366						     operands[4]));
20367  DONE;
20368})
20369
20370(define_insn "vpshrdv_<mode>_maskz_1"
20371  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20372	(vec_merge:VI248_AVX512VL
20373	  (unspec:VI248_AVX512VL
20374	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20375	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20376	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20377	    UNSPEC_VPSHRDV)
20378	  (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
20379	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20380  "TARGET_AVX512VBMI2"
20381  "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20382   [(set_attr ("prefix") ("evex"))
20383   (set_attr "mode" "<sseinsnmode>")])
20384
20385(define_insn "vpshldv_<mode>"
20386  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20387	(unspec:VI248_AVX512VL
20388	  [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20389	   (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20390	   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20391	  UNSPEC_VPSHLDV))]
20392  "TARGET_AVX512VBMI2"
20393  "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
20394   [(set_attr ("prefix") ("evex"))
20395   (set_attr "mode" "<sseinsnmode>")])
20396
20397(define_insn "vpshldv_<mode>_mask"
20398  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20399	(vec_merge:VI248_AVX512VL
20400	  (unspec:VI248_AVX512VL
20401	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20402	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20403	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20404	    UNSPEC_VPSHLDV)
20405	  (match_dup 1)
20406	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20407  "TARGET_AVX512VBMI2"
20408  "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20409   [(set_attr ("prefix") ("evex"))
20410   (set_attr "mode" "<sseinsnmode>")])
20411
20412(define_expand "vpshldv_<mode>_maskz"
20413  [(match_operand:VI248_AVX512VL 0 "register_operand")
20414   (match_operand:VI248_AVX512VL 1 "register_operand")
20415   (match_operand:VI248_AVX512VL 2 "register_operand")
20416   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
20417   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20418  "TARGET_AVX512VBMI2"
20419{
20420  emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
20421					 operands[2], operands[3],
20422					 CONST0_RTX (<MODE>mode),
20423						     operands[4]));
20424  DONE;
20425})
20426
20427(define_insn "vpshldv_<mode>_maskz_1"
20428  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20429	(vec_merge:VI248_AVX512VL
20430	  (unspec:VI248_AVX512VL
20431	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20432	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20433	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20434	    UNSPEC_VPSHLDV)
20435	  (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
20436	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20437  "TARGET_AVX512VBMI2"
20438  "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20439   [(set_attr ("prefix") ("evex"))
20440   (set_attr "mode" "<sseinsnmode>")])
20441
20442(define_insn "vpdpbusd_<mode>"
20443  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20444	(unspec:VI4_AVX512VL
20445	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20446	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20447	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20448	  UNSPEC_VPMADDUBSWACCD))]
20449  "TARGET_AVX512VNNI"
20450  "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
20451   [(set_attr ("prefix") ("evex"))])
20452
20453(define_insn "vpdpbusd_<mode>_mask"
20454  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20455	(vec_merge:VI4_AVX512VL
20456	  (unspec:VI4_AVX512VL
20457	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20458	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20459	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20460	    UNSPEC_VPMADDUBSWACCD)
20461	  (match_dup 1)
20462	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20463  "TARGET_AVX512VNNI"
20464  "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20465   [(set_attr ("prefix") ("evex"))])
20466
20467(define_expand "vpdpbusd_<mode>_maskz"
20468  [(match_operand:VI4_AVX512VL 0 "register_operand")
20469   (match_operand:VI4_AVX512VL 1 "register_operand")
20470   (match_operand:VI4_AVX512VL 2 "register_operand")
20471   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20472   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20473  "TARGET_AVX512VNNI"
20474{
20475  emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
20476					  operands[2], operands[3],
20477					  CONST0_RTX (<MODE>mode),
20478						      operands[4]));
20479  DONE;
20480})
20481
20482(define_insn "vpdpbusd_<mode>_maskz_1"
20483  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20484	(vec_merge:VI4_AVX512VL
20485	  (unspec:VI4_AVX512VL
20486	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20487	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20488	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
20489	    ] UNSPEC_VPMADDUBSWACCD)
20490	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20491	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20492  "TARGET_AVX512VNNI"
20493  "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20494   [(set_attr ("prefix") ("evex"))])
20495
20496
20497(define_insn "vpdpbusds_<mode>"
20498  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20499	(unspec:VI4_AVX512VL
20500	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20501	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20502	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20503	  UNSPEC_VPMADDUBSWACCSSD))]
20504  "TARGET_AVX512VNNI"
20505  "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
20506   [(set_attr ("prefix") ("evex"))])
20507
20508(define_insn "vpdpbusds_<mode>_mask"
20509  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20510	(vec_merge:VI4_AVX512VL
20511	  (unspec:VI4_AVX512VL
20512	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20513	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20514	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20515	    UNSPEC_VPMADDUBSWACCSSD)
20516	  (match_dup 1)
20517	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20518  "TARGET_AVX512VNNI"
20519  "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20520   [(set_attr ("prefix") ("evex"))])
20521
20522(define_expand "vpdpbusds_<mode>_maskz"
20523  [(match_operand:VI4_AVX512VL 0 "register_operand")
20524   (match_operand:VI4_AVX512VL 1 "register_operand")
20525   (match_operand:VI4_AVX512VL 2 "register_operand")
20526   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20527   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20528  "TARGET_AVX512VNNI"
20529{
20530  emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
20531					   operands[2], operands[3],
20532					   CONST0_RTX (<MODE>mode),
20533						       operands[4]));
20534  DONE;
20535})
20536
20537(define_insn "vpdpbusds_<mode>_maskz_1"
20538  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20539	(vec_merge:VI4_AVX512VL
20540	  (unspec:VI4_AVX512VL
20541	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20542	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20543	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20544	    UNSPEC_VPMADDUBSWACCSSD)
20545	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20546	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20547  "TARGET_AVX512VNNI"
20548  "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20549   [(set_attr ("prefix") ("evex"))])
20550
20551
20552(define_insn "vpdpwssd_<mode>"
20553  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20554	(unspec:VI4_AVX512VL
20555	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20556	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20557	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20558	  UNSPEC_VPMADDWDACCD))]
20559  "TARGET_AVX512VNNI"
20560  "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
20561   [(set_attr ("prefix") ("evex"))])
20562
20563(define_insn "vpdpwssd_<mode>_mask"
20564  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20565	(vec_merge:VI4_AVX512VL
20566	  (unspec:VI4_AVX512VL
20567	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20568	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20569	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20570	    UNSPEC_VPMADDWDACCD)
20571	  (match_dup 1)
20572	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20573  "TARGET_AVX512VNNI"
20574  "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20575   [(set_attr ("prefix") ("evex"))])
20576
20577(define_expand "vpdpwssd_<mode>_maskz"
20578  [(match_operand:VI4_AVX512VL 0 "register_operand")
20579   (match_operand:VI4_AVX512VL 1 "register_operand")
20580   (match_operand:VI4_AVX512VL 2 "register_operand")
20581   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20582   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20583  "TARGET_AVX512VNNI"
20584{
20585  emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
20586					  operands[2], operands[3],
20587					  CONST0_RTX (<MODE>mode),
20588						      operands[4]));
20589  DONE;
20590})
20591
20592(define_insn "vpdpwssd_<mode>_maskz_1"
20593  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20594	(vec_merge:VI4_AVX512VL
20595	  (unspec:VI4_AVX512VL
20596	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20597	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20598	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20599	    UNSPEC_VPMADDWDACCD)
20600	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20601	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20602  "TARGET_AVX512VNNI"
20603  "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20604   [(set_attr ("prefix") ("evex"))])
20605
20606
20607(define_insn "vpdpwssds_<mode>"
20608  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20609	(unspec:VI4_AVX512VL
20610	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20611	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20612	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20613	  UNSPEC_VPMADDWDACCSSD))]
20614  "TARGET_AVX512VNNI"
20615  "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
20616   [(set_attr ("prefix") ("evex"))])
20617
20618(define_insn "vpdpwssds_<mode>_mask"
20619  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20620	(vec_merge:VI4_AVX512VL
20621	  (unspec:VI4_AVX512VL
20622	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20623	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20624	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20625	    UNSPEC_VPMADDWDACCSSD)
20626	  (match_dup 1)
20627	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20628  "TARGET_AVX512VNNI"
20629  "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20630   [(set_attr ("prefix") ("evex"))])
20631
20632(define_expand "vpdpwssds_<mode>_maskz"
20633  [(match_operand:VI4_AVX512VL 0 "register_operand")
20634   (match_operand:VI4_AVX512VL 1 "register_operand")
20635   (match_operand:VI4_AVX512VL 2 "register_operand")
20636   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20637   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20638  "TARGET_AVX512VNNI"
20639{
20640  emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
20641					   operands[2], operands[3],
20642					   CONST0_RTX (<MODE>mode),
20643						       operands[4]));
20644  DONE;
20645})
20646
20647(define_insn "vpdpwssds_<mode>_maskz_1"
20648  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20649	(vec_merge:VI4_AVX512VL
20650	  (unspec:VI4_AVX512VL
20651	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20652	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20653	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20654	    UNSPEC_VPMADDWDACCSSD)
20655	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20656	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20657  "TARGET_AVX512VNNI"
20658  "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20659   [(set_attr ("prefix") ("evex"))])
20660
20661(define_insn "vaesdec_<mode>"
20662  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20663	(unspec:VI1_AVX512VL_F
20664	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20665	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "v")]
20666	  UNSPEC_VAESDEC))]
20667  "TARGET_VAES"
20668  "vaesdec\t{%2, %1, %0|%0, %1, %2}"
20669)
20670
20671(define_insn "vaesdeclast_<mode>"
20672  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20673	(unspec:VI1_AVX512VL_F
20674	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20675	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "v")]
20676	  UNSPEC_VAESDECLAST))]
20677  "TARGET_VAES"
20678  "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
20679)
20680
20681(define_insn "vaesenc_<mode>"
20682  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20683	(unspec:VI1_AVX512VL_F
20684	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20685	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
20686	  UNSPEC_VAESENC))]
20687  "TARGET_VAES"
20688  "vaesenc\t{%2, %1, %0|%0, %1, %2}"
20689)
20690
20691(define_insn "vaesenclast_<mode>"
20692  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20693	(unspec:VI1_AVX512VL_F
20694	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20695	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
20696	  UNSPEC_VAESENCLAST))]
20697  "TARGET_VAES"
20698  "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
20699)
20700
20701(define_insn "vpclmulqdq_<mode>"
20702  [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
20703	(unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
20704			 (match_operand:VI8_FVL 2 "vector_operand" "vm")
20705			 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20706			UNSPEC_VPCLMULQDQ))]
20707  "TARGET_VPCLMULQDQ"
20708  "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20709  [(set_attr "mode" "DI")])
20710
20711(define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
20712  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
20713	(unspec:<avx512fmaskmode>
20714	  [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
20715	   (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
20716	  UNSPEC_VPSHUFBIT))]
20717  "TARGET_AVX512BITALG"
20718  "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
20719  [(set_attr "prefix" "evex")
20720   (set_attr "mode" "<sseinsnmode>")])
20721