xref: /dragonfly/contrib/gcc-8.0/gcc/config/i386/sse.md (revision c87dd536)
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005-2018 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  ;; SSE
22  UNSPEC_MOVNT
23
24  ;; SSE3
25  UNSPEC_LDDQU
26
27  ;; SSSE3
28  UNSPEC_PSHUFB
29  UNSPEC_PSIGN
30  UNSPEC_PALIGNR
31
32  ;; For SSE4A support
33  UNSPEC_EXTRQI
34  UNSPEC_EXTRQ
35  UNSPEC_INSERTQI
36  UNSPEC_INSERTQ
37
38  ;; For SSE4.1 support
39  UNSPEC_BLENDV
40  UNSPEC_INSERTPS
41  UNSPEC_DP
42  UNSPEC_MOVNTDQA
43  UNSPEC_MPSADBW
44  UNSPEC_PHMINPOSUW
45  UNSPEC_PTEST
46
47  ;; For SSE4.2 support
48  UNSPEC_PCMPESTR
49  UNSPEC_PCMPISTR
50
51  ;; For FMA4 support
52  UNSPEC_FMADDSUB
53  UNSPEC_XOP_UNSIGNED_CMP
54  UNSPEC_XOP_TRUEFALSE
55  UNSPEC_XOP_PERMUTE
56  UNSPEC_FRCZ
57
58  ;; For AES support
59  UNSPEC_AESENC
60  UNSPEC_AESENCLAST
61  UNSPEC_AESDEC
62  UNSPEC_AESDECLAST
63  UNSPEC_AESIMC
64  UNSPEC_AESKEYGENASSIST
65
66  ;; For PCLMUL support
67  UNSPEC_PCLMUL
68
69  ;; For AVX support
70  UNSPEC_PCMP
71  UNSPEC_VPERMIL
72  UNSPEC_VPERMIL2
73  UNSPEC_VPERMIL2F128
74  UNSPEC_CAST
75  UNSPEC_VTESTP
76  UNSPEC_VCVTPH2PS
77  UNSPEC_VCVTPS2PH
78
79  ;; For AVX2 support
80  UNSPEC_VPERMVAR
81  UNSPEC_VPERMTI
82  UNSPEC_GATHER
83  UNSPEC_VSIBADDR
84
85  ;; For AVX512F support
86  UNSPEC_VPERMT2
87  UNSPEC_UNSIGNED_FIX_NOTRUNC
88  UNSPEC_UNSIGNED_PCMP
89  UNSPEC_TESTM
90  UNSPEC_TESTNM
91  UNSPEC_SCATTER
92  UNSPEC_RCP14
93  UNSPEC_RSQRT14
94  UNSPEC_FIXUPIMM
95  UNSPEC_SCALEF
96  UNSPEC_VTERNLOG
97  UNSPEC_GETEXP
98  UNSPEC_GETMANT
99  UNSPEC_ALIGN
100  UNSPEC_CONFLICT
101  UNSPEC_COMPRESS
102  UNSPEC_COMPRESS_STORE
103  UNSPEC_EXPAND
104  UNSPEC_MASKED_EQ
105  UNSPEC_MASKED_GT
106
107  ;; Mask operations
108  UNSPEC_MASKOP
109  UNSPEC_KORTEST
110  UNSPEC_KTEST
111
112  ;; For embed. rounding feature
113  UNSPEC_EMBEDDED_ROUNDING
114
115  ;; For AVX512PF support
116  UNSPEC_GATHER_PREFETCH
117  UNSPEC_SCATTER_PREFETCH
118
119  ;; For AVX512ER support
120  UNSPEC_EXP2
121  UNSPEC_RCP28
122  UNSPEC_RSQRT28
123
124  ;; For SHA support
125  UNSPEC_SHA1MSG1
126  UNSPEC_SHA1MSG2
127  UNSPEC_SHA1NEXTE
128  UNSPEC_SHA1RNDS4
129  UNSPEC_SHA256MSG1
130  UNSPEC_SHA256MSG2
131  UNSPEC_SHA256RNDS2
132
133  ;; For AVX512BW support
134  UNSPEC_DBPSADBW
135  UNSPEC_PMADDUBSW512
136  UNSPEC_PMADDWD512
137  UNSPEC_PSHUFHW
138  UNSPEC_PSHUFLW
139  UNSPEC_CVTINT2MASK
140
141  ;; For AVX512DQ support
142  UNSPEC_REDUCE
143  UNSPEC_FPCLASS
144  UNSPEC_RANGE
145
146  ;; For AVX512IFMA support
147  UNSPEC_VPMADD52LUQ
148  UNSPEC_VPMADD52HUQ
149
150  ;; For AVX512VBMI support
151  UNSPEC_VPMULTISHIFT
152
153  ;; For AVX5124FMAPS/AVX5124VNNIW support
154  UNSPEC_VP4FMADD
155  UNSPEC_VP4FNMADD
156  UNSPEC_VP4DPWSSD
157  UNSPEC_VP4DPWSSDS
158
159  ;; For GFNI support
160  UNSPEC_GF2P8AFFINEINV
161  UNSPEC_GF2P8AFFINE
162  UNSPEC_GF2P8MUL
163
164  ;; For AVX512VBMI2 support
165  UNSPEC_VPSHLD
166  UNSPEC_VPSHRD
167  UNSPEC_VPSHRDV
168  UNSPEC_VPSHLDV
169
170  ;; For AVX512VNNI support
171  UNSPEC_VPMADDUBSWACCD
172  UNSPEC_VPMADDUBSWACCSSD
173  UNSPEC_VPMADDWDACCD
174  UNSPEC_VPMADDWDACCSSD
175
176  ;; For VAES support
177  UNSPEC_VAESDEC
178  UNSPEC_VAESDECLAST
179  UNSPEC_VAESENC
180  UNSPEC_VAESENCLAST
181
182  ;; For VPCLMULQDQ support
183  UNSPEC_VPCLMULQDQ
184
185  ;; For AVX512BITALG support
186  UNSPEC_VPSHUFBIT
187])
188
189(define_c_enum "unspecv" [
190  UNSPECV_LDMXCSR
191  UNSPECV_STMXCSR
192  UNSPECV_CLFLUSH
193  UNSPECV_MONITOR
194  UNSPECV_MWAIT
195  UNSPECV_VZEROALL
196  UNSPECV_VZEROUPPER
197])
198
199;; All vector modes including V?TImode, used in move patterns.
200(define_mode_iterator VMOVE
201  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
202   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
203   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
204   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
205   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
206   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
207   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
208
209;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
210(define_mode_iterator V48_AVX512VL
211  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
212   V8DI  (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
213   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
214   V8DF  (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
215
216;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
217(define_mode_iterator VI12_AVX512VL
218  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
219   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
220
221;; Same iterator, but without supposed TARGET_AVX512BW
222(define_mode_iterator VI12_AVX512VLBW
223  [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
224   (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
225   (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
226
227(define_mode_iterator VI1_AVX512VL
228  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
229
230;; All vector modes
231(define_mode_iterator V
232  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
233   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
234   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
235   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
236   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
237   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
238
239;; All 128bit vector modes
240(define_mode_iterator V_128
241  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
242
243;; All 256bit vector modes
244(define_mode_iterator V_256
245  [V32QI V16HI V8SI V4DI V8SF V4DF])
246
247;; All 128bit and 256bit vector modes
248(define_mode_iterator V_128_256
249  [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
250
251;; All 512bit vector modes
252(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
253
254;; All 256bit and 512bit vector modes
255(define_mode_iterator V_256_512
256  [V32QI V16HI V8SI V4DI V8SF V4DF
257   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
258   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
259
260;; All vector float modes
261(define_mode_iterator VF
262  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
263   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
264
265;; 128- and 256-bit float vector modes
266(define_mode_iterator VF_128_256
267  [(V8SF "TARGET_AVX") V4SF
268   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
269
270;; All SFmode vector float modes
271(define_mode_iterator VF1
272  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
273
274;; 128- and 256-bit SF vector modes
275(define_mode_iterator VF1_128_256
276  [(V8SF "TARGET_AVX") V4SF])
277
278(define_mode_iterator VF1_128_256VL
279  [V8SF (V4SF "TARGET_AVX512VL")])
280
281;; All DFmode vector float modes
282(define_mode_iterator VF2
283  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
284
285;; 128- and 256-bit DF vector modes
286(define_mode_iterator VF2_128_256
287  [(V4DF "TARGET_AVX") V2DF])
288
289(define_mode_iterator VF2_512_256
290  [(V8DF "TARGET_AVX512F") V4DF])
291
292(define_mode_iterator VF2_512_256VL
293  [V8DF (V4DF "TARGET_AVX512VL")])
294
295;; All 128bit vector float modes
296(define_mode_iterator VF_128
297  [V4SF (V2DF "TARGET_SSE2")])
298
299;; All 256bit vector float modes
300(define_mode_iterator VF_256
301  [V8SF V4DF])
302
303;; All 512bit vector float modes
304(define_mode_iterator VF_512
305  [V16SF V8DF])
306
307(define_mode_iterator VI48_AVX512VL
308  [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
309   V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
310
311(define_mode_iterator VF_AVX512VL
312  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
313   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
314
315(define_mode_iterator VF2_AVX512VL
316  [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
317
318(define_mode_iterator VF1_AVX512VL
319  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
320
321;; All vector integer modes
322(define_mode_iterator VI
323  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
324   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
325   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
326   (V8SI "TARGET_AVX") V4SI
327   (V4DI "TARGET_AVX") V2DI])
328
329(define_mode_iterator VI_AVX2
330  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
331   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
332   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
333   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
334
335;; All QImode vector integer modes
336(define_mode_iterator VI1
337  [(V32QI "TARGET_AVX") V16QI])
338
339;; All DImode vector integer modes
340(define_mode_iterator V_AVX
341  [V16QI V8HI V4SI V2DI V4SF V2DF
342   (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
343   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
344   (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
345
346(define_mode_iterator VI48_AVX
347 [V4SI V2DI
348  (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
349
350(define_mode_iterator VI8
351  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
352
353(define_mode_iterator VI8_FVL
354  [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
355
356(define_mode_iterator VI8_AVX512VL
357  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
358
359(define_mode_iterator VI8_256_512
360  [V8DI (V4DI "TARGET_AVX512VL")])
361
362(define_mode_iterator VI1_AVX2
363  [(V32QI "TARGET_AVX2") V16QI])
364
365(define_mode_iterator VI1_AVX512
366  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
367
368(define_mode_iterator VI1_AVX512F
369  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
370
371(define_mode_iterator VI2_AVX2
372  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
373
374(define_mode_iterator VI2_AVX512F
375  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
376
377(define_mode_iterator VI4_AVX
378  [(V8SI "TARGET_AVX") V4SI])
379
380(define_mode_iterator VI4_AVX2
381  [(V8SI "TARGET_AVX2") V4SI])
382
383(define_mode_iterator VI4_AVX512F
384  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
385
386(define_mode_iterator VI4_AVX512VL
387  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
388
389(define_mode_iterator VI48_AVX512F_AVX512VL
390  [V4SI V8SI (V16SI "TARGET_AVX512F")
391   (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
392
393(define_mode_iterator VI2_AVX512VL
394  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
395
396(define_mode_iterator VI1_AVX512VL_F
397  [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
398
399(define_mode_iterator VI8_AVX2_AVX512BW
400  [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
401
402(define_mode_iterator VI8_AVX2
403  [(V4DI "TARGET_AVX2") V2DI])
404
405(define_mode_iterator VI8_AVX2_AVX512F
406  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
407
408(define_mode_iterator VI8_AVX_AVX512F
409  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
410
411(define_mode_iterator VI4_128_8_256
412  [V4SI V4DI])
413
414;; All V8D* modes
415(define_mode_iterator V8FI
416  [V8DF V8DI])
417
418;; All V16S* modes
419(define_mode_iterator V16FI
420  [V16SF V16SI])
421
422;; ??? We should probably use TImode instead.
423(define_mode_iterator VIMAX_AVX2_AVX512BW
424  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
425
426;; Suppose TARGET_AVX512BW as baseline
427(define_mode_iterator VIMAX_AVX512VL
428  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
429
430(define_mode_iterator VIMAX_AVX2
431  [(V2TI "TARGET_AVX2") V1TI])
432
433;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
434(define_mode_iterator SSESCALARMODE
435  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
436
437(define_mode_iterator VI12_AVX2
438  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
439   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
440
441(define_mode_iterator VI24_AVX2
442  [(V16HI "TARGET_AVX2") V8HI
443   (V8SI "TARGET_AVX2") V4SI])
444
445(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
446  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
447   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
448   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
449
450(define_mode_iterator VI124_AVX2
451  [(V32QI "TARGET_AVX2") V16QI
452   (V16HI "TARGET_AVX2") V8HI
453   (V8SI "TARGET_AVX2") V4SI])
454
455(define_mode_iterator VI2_AVX2_AVX512BW
456  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
457
458(define_mode_iterator VI248_AVX512VL
459  [V32HI V16SI V8DI
460   (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
461   (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
462   (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
463
464(define_mode_iterator VI48_AVX2
465  [(V8SI "TARGET_AVX2") V4SI
466   (V4DI "TARGET_AVX2") V2DI])
467
468(define_mode_iterator VI248_AVX2
469  [(V16HI "TARGET_AVX2") V8HI
470   (V8SI "TARGET_AVX2") V4SI
471   (V4DI "TARGET_AVX2") V2DI])
472
473(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
474  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
475   (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
476   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
477
478(define_mode_iterator VI248_AVX512BW
479  [(V32HI "TARGET_AVX512BW") V16SI V8DI])
480
481(define_mode_iterator VI248_AVX512BW_AVX512VL
482  [(V32HI "TARGET_AVX512BW")
483   (V4DI "TARGET_AVX512VL") V16SI V8DI])
484
485;; Suppose TARGET_AVX512VL as baseline
486(define_mode_iterator VI248_AVX512BW_1
487 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
488  V8SI V4SI
489  V2DI])
490
491(define_mode_iterator VI248_AVX512BW_2
492 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
493  V8SI V4SI
494  V4DI V2DI])
495
496(define_mode_iterator VI48_AVX512F
497  [(V16SI "TARGET_AVX512F") V8SI V4SI
498   (V8DI "TARGET_AVX512F") V4DI V2DI])
499
500(define_mode_iterator VI48_AVX_AVX512F
501  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
502   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
503
504(define_mode_iterator VI12_AVX_AVX512F
505  [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
506    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
507
508(define_mode_iterator V48_AVX2
509  [V4SF V2DF
510   V8SF V4DF
511   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
512   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
513
514(define_mode_iterator VI1_AVX512VLBW
515  [(V64QI "TARGET_AVX512BW") (V32QI  "TARGET_AVX512VL")
516	(V16QI  "TARGET_AVX512VL")])
517
518(define_mode_attr avx512
519  [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
520   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
521   (V4SI  "avx512vl") (V8SI  "avx512vl") (V16SI "avx512f")
522   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
523   (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
524   (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
525
526(define_mode_attr sse2_avx_avx512f
527  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
528   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
529   (V4SI  "sse2") (V8SI  "avx") (V16SI "avx512f")
530   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
531   (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
532   (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
533
534(define_mode_attr sse2_avx2
535  [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
536   (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
537   (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
538   (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
539   (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
540
541(define_mode_attr ssse3_avx2
542   [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
543    (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
544    (V4SI "ssse3") (V8SI "avx2")
545    (V2DI "ssse3") (V4DI "avx2")
546    (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
547
548(define_mode_attr sse4_1_avx2
549   [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
550    (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
551    (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
552    (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
553
554(define_mode_attr avx_avx2
555  [(V4SF "avx") (V2DF "avx")
556   (V8SF "avx") (V4DF "avx")
557   (V4SI "avx2") (V2DI "avx2")
558   (V8SI "avx2") (V4DI "avx2")])
559
560(define_mode_attr vec_avx2
561  [(V16QI "vec") (V32QI "avx2")
562   (V8HI "vec") (V16HI "avx2")
563   (V4SI "vec") (V8SI "avx2")
564   (V2DI "vec") (V4DI "avx2")])
565
566(define_mode_attr avx2_avx512
567  [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
568   (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
569   (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
570   (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
571   (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
572
573(define_mode_attr shuffletype
574  [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
575  (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
576  (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
577  (V32HI "i") (V16HI "i") (V8HI "i")
578  (V64QI "i") (V32QI "i") (V16QI "i")
579  (V4TI "i") (V2TI "i") (V1TI "i")])
580
581(define_mode_attr ssequartermode
582  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
583
584(define_mode_attr ssedoublemodelower
585  [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
586   (V8HI "v8si")   (V16HI "v16si") (V32HI "v32si")
587   (V4SI "v4di")   (V8SI "v8di")   (V16SI "v16di")])
588
589(define_mode_attr ssedoublemode
590  [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
591   (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
592   (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
593   (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
594   (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
595   (V4DI "V8DI") (V8DI "V16DI")])
596
597(define_mode_attr ssebytemode
598  [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
599
600;; All 128bit vector integer modes
601(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
602
603;; All 256bit vector integer modes
604(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
605
606;; Various 128bit vector integer mode combinations
607(define_mode_iterator VI12_128 [V16QI V8HI])
608(define_mode_iterator VI14_128 [V16QI V4SI])
609(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
610(define_mode_iterator VI24_128 [V8HI V4SI])
611(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
612(define_mode_iterator VI48_128 [V4SI V2DI])
613
614;; Various 256bit and 512 vector integer mode combinations
615(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
616(define_mode_iterator VI124_256_AVX512F_AVX512BW
617  [V32QI V16HI V8SI
618   (V64QI "TARGET_AVX512BW")
619   (V32HI "TARGET_AVX512BW")
620   (V16SI "TARGET_AVX512F")])
621(define_mode_iterator VI48_256 [V8SI V4DI])
622(define_mode_iterator VI48_512 [V16SI V8DI])
623(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
624(define_mode_iterator VI_AVX512BW
625  [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
626
627;; Int-float size matches
628(define_mode_iterator VI4F_128 [V4SI V4SF])
629(define_mode_iterator VI8F_128 [V2DI V2DF])
630(define_mode_iterator VI4F_256 [V8SI V8SF])
631(define_mode_iterator VI8F_256 [V4DI V4DF])
632(define_mode_iterator VI4F_256_512
633  [V8SI V8SF
634   (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
635(define_mode_iterator VI48F_256_512
636  [V8SI V8SF
637  (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
638  (V8DI  "TARGET_AVX512F") (V8DF  "TARGET_AVX512F")
639  (V4DI  "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
640(define_mode_iterator VF48_I1248
641  [V16SI V16SF V8DI V8DF V32HI V64QI])
642(define_mode_iterator VI48F
643  [V16SI V16SF V8DI V8DF
644   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
645   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
646   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
647   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
648(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
649
650;; Mapping from float mode to required SSE level
651(define_mode_attr sse
652  [(SF "sse") (DF "sse2")
653   (V4SF "sse") (V2DF "sse2")
654   (V16SF "avx512f") (V8SF "avx")
655   (V8DF "avx512f") (V4DF "avx")])
656
657(define_mode_attr sse2
658  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
659   (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
660
661(define_mode_attr sse3
662  [(V16QI "sse3") (V32QI "avx")])
663
664(define_mode_attr sse4_1
665  [(V4SF "sse4_1") (V2DF "sse4_1")
666   (V8SF "avx") (V4DF "avx")
667   (V8DF "avx512f")
668   (V4DI "avx") (V2DI "sse4_1")
669   (V8SI "avx") (V4SI "sse4_1")
670   (V16QI "sse4_1") (V32QI "avx")
671   (V8HI "sse4_1") (V16HI "avx")])
672
673(define_mode_attr avxsizesuffix
674  [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
675   (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
676   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
677   (V16SF "512") (V8DF "512")
678   (V8SF "256") (V4DF "256")
679   (V4SF "") (V2DF "")])
680
681;; SSE instruction mode
682(define_mode_attr sseinsnmode
683  [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
684   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
685   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
686   (V16SF "V16SF") (V8DF "V8DF")
687   (V8SF "V8SF") (V4DF "V4DF")
688   (V4SF "V4SF") (V2DF "V2DF")
689   (TI "TI")])
690
691;; Mapping of vector modes to corresponding mask size
692(define_mode_attr avx512fmaskmode
693  [(V64QI "DI") (V32QI "SI") (V16QI "HI")
694   (V32HI "SI") (V16HI "HI") (V8HI  "QI") (V4HI "QI")
695   (V16SI "HI") (V8SI  "QI") (V4SI  "QI")
696   (V8DI  "QI") (V4DI  "QI") (V2DI  "QI")
697   (V16SF "HI") (V8SF  "QI") (V4SF  "QI")
698   (V8DF  "QI") (V4DF  "QI") (V2DF  "QI")])
699
700;; Mapping of vector modes to corresponding mask size
701(define_mode_attr avx512fmaskmodelower
702  [(V64QI "di") (V32QI "si") (V16QI "hi")
703   (V32HI "si") (V16HI "hi") (V8HI  "qi") (V4HI "qi")
704   (V16SI "hi") (V8SI  "qi") (V4SI  "qi")
705   (V8DI  "qi") (V4DI  "qi") (V2DI  "qi")
706   (V16SF "hi") (V8SF  "qi") (V4SF  "qi")
707   (V8DF  "qi") (V4DF  "qi") (V2DF  "qi")])
708
709;; Mapping of vector float modes to an integer mode of the same size
710(define_mode_attr sseintvecmode
711  [(V16SF "V16SI") (V8DF  "V8DI")
712   (V8SF  "V8SI")  (V4DF  "V4DI")
713   (V4SF  "V4SI")  (V2DF  "V2DI")
714   (V16SI "V16SI") (V8DI  "V8DI")
715   (V8SI  "V8SI")  (V4DI  "V4DI")
716   (V4SI  "V4SI")  (V2DI  "V2DI")
717   (V16HI "V16HI") (V8HI  "V8HI")
718   (V32HI "V32HI") (V64QI "V64QI")
719   (V32QI "V32QI") (V16QI "V16QI")])
720
721(define_mode_attr sseintvecmode2
722  [(V8DF "XI") (V4DF "OI") (V2DF "TI")
723   (V8SF "OI") (V4SF "TI")])
724
725(define_mode_attr sseintvecmodelower
726  [(V16SF "v16si") (V8DF "v8di")
727   (V8SF "v8si") (V4DF "v4di")
728   (V4SF "v4si") (V2DF "v2di")
729   (V8SI "v8si") (V4DI "v4di")
730   (V4SI "v4si") (V2DI "v2di")
731   (V16HI "v16hi") (V8HI "v8hi")
732   (V32QI "v32qi") (V16QI "v16qi")])
733
734;; Mapping of vector modes to a vector mode of double size
735(define_mode_attr ssedoublevecmode
736  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
737   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
738   (V8SF "V16SF") (V4DF "V8DF")
739   (V4SF "V8SF") (V2DF "V4DF")])
740
741;; Mapping of vector modes to a vector mode of half size
742(define_mode_attr ssehalfvecmode
743  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
744   (V32QI "V16QI") (V16HI  "V8HI") (V8SI  "V4SI") (V4DI "V2DI")
745   (V16QI  "V8QI") (V8HI   "V4HI") (V4SI  "V2SI")
746   (V16SF "V8SF") (V8DF "V4DF")
747   (V8SF  "V4SF") (V4DF "V2DF")
748   (V4SF  "V2SF")])
749
750(define_mode_attr ssehalfvecmodelower
751  [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
752   (V32QI "v16qi") (V16HI  "v8hi") (V8SI  "v4si") (V4DI "v2di")
753   (V16QI  "v8qi") (V8HI   "v4hi") (V4SI  "v2si")
754   (V16SF "v8sf") (V8DF "v4df")
755   (V8SF  "v4sf") (V4DF "v2df")
756   (V4SF  "v2sf")])
757
758;; Mapping of vector modes ti packed single mode of the same size
759(define_mode_attr ssePSmode
760  [(V16SI "V16SF") (V8DF "V16SF")
761   (V16SF "V16SF") (V8DI "V16SF")
762   (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
763   (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
764   (V8SI "V8SF") (V4SI "V4SF")
765   (V4DI "V8SF") (V2DI "V4SF")
766   (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
767   (V8SF "V8SF") (V4SF "V4SF")
768   (V4DF "V8SF") (V2DF "V4SF")])
769
770(define_mode_attr ssePSmode2
771  [(V8DI "V8SF") (V4DI "V4SF")])
772
773;; Mapping of vector modes back to the scalar modes
774(define_mode_attr ssescalarmode
775  [(V64QI "QI") (V32QI "QI") (V16QI "QI")
776   (V32HI "HI") (V16HI "HI") (V8HI "HI")
777   (V16SI "SI") (V8SI "SI")  (V4SI "SI")
778   (V8DI "DI")  (V4DI "DI")  (V2DI "DI")
779   (V16SF "SF") (V8SF "SF")  (V4SF "SF")
780   (V8DF "DF")  (V4DF "DF")  (V2DF "DF")
781   (V4TI "TI")  (V2TI "TI")])
782
783;; Mapping of vector modes back to the scalar modes
784(define_mode_attr ssescalarmodelower
785  [(V64QI "qi") (V32QI "qi") (V16QI "qi")
786   (V32HI "hi") (V16HI "hi") (V8HI "hi")
787   (V16SI "si") (V8SI "si")  (V4SI "si")
788   (V8DI "di")  (V4DI "di")  (V2DI "di")
789   (V16SF "sf") (V8SF "sf")  (V4SF "sf")
790   (V8DF "df")  (V4DF "df")  (V2DF "df")
791   (V4TI "ti")  (V2TI "ti")])
792
793;; Mapping of vector modes to the 128bit modes
794(define_mode_attr ssexmmmode
795  [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
796   (V32HI "V8HI")  (V16HI "V8HI") (V8HI "V8HI")
797   (V16SI "V4SI")  (V8SI "V4SI")  (V4SI "V4SI")
798   (V8DI "V2DI")   (V4DI "V2DI")  (V2DI "V2DI")
799   (V16SF "V4SF")  (V8SF "V4SF")  (V4SF "V4SF")
800   (V8DF "V2DF")   (V4DF "V2DF")  (V2DF "V2DF")])
801
802;; Pointer size override for scalar modes (Intel asm dialect)
803(define_mode_attr iptr
804  [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
805   (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
806   (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
807   (V16SF "k") (V8DF "q")
808   (V8SF "k") (V4DF "q")
809   (V4SF "k") (V2DF "q")
810   (SF "k") (DF "q")])
811
812;; Number of scalar elements in each vector type
813(define_mode_attr ssescalarnum
814  [(V64QI "64") (V16SI "16") (V8DI "8")
815   (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
816   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
817   (V16SF "16") (V8DF "8")
818   (V8SF "8") (V4DF "4")
819   (V4SF "4") (V2DF "2")])
820
821;; Mask of scalar elements in each vector type
822(define_mode_attr ssescalarnummask
823  [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
824   (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
825   (V8SF "7") (V4DF "3")
826   (V4SF "3") (V2DF "1")])
827
828(define_mode_attr ssescalarsize
829  [(V4TI  "64") (V2TI  "64") (V1TI  "64")
830   (V8DI  "64") (V4DI  "64") (V2DI  "64")
831   (V64QI "8") (V32QI "8") (V16QI "8")
832   (V32HI "16") (V16HI "16") (V8HI "16")
833   (V16SI "32") (V8SI "32") (V4SI "32")
834   (V16SF "32") (V8SF "32") (V4SF "32")
835   (V8DF "64") (V4DF "64") (V2DF "64")])
836
837;; SSE prefix for integer vector modes
838(define_mode_attr sseintprefix
839  [(V2DI  "p") (V2DF  "")
840   (V4DI  "p") (V4DF  "")
841   (V8DI  "p") (V8DF  "")
842   (V4SI  "p") (V4SF  "")
843   (V8SI  "p") (V8SF  "")
844   (V16SI "p") (V16SF "")
845   (V16QI "p") (V8HI "p")
846   (V32QI "p") (V16HI "p")
847   (V64QI "p") (V32HI "p")])
848
849;; SSE scalar suffix for vector modes
850(define_mode_attr ssescalarmodesuffix
851  [(SF "ss") (DF "sd")
852   (V16SF "ss") (V8DF "sd")
853   (V8SF "ss") (V4DF "sd")
854   (V4SF "ss") (V2DF "sd")
855   (V16SI "d") (V8DI "q")
856   (V8SI "d") (V4DI "q")
857   (V4SI "d") (V2DI "q")])
858
859;; Pack/unpack vector modes
860(define_mode_attr sseunpackmode
861  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
862   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
863   (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
864
865(define_mode_attr ssepackmode
866  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
867   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
868   (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
869
870;; Mapping of the max integer size for xop rotate immediate constraint
871(define_mode_attr sserotatemax
872  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
873
874;; Mapping of mode to cast intrinsic name
875(define_mode_attr castmode
876 [(V8SI "si") (V8SF "ps") (V4DF "pd")
877  (V16SI "si") (V16SF "ps") (V8DF "pd")])
878
879;; Instruction suffix for sign and zero extensions.
880(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
881
882;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
883;; i64x4 or f64x4 for 512bit modes.
884(define_mode_attr i128
885  [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
886   (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
887   (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
888
889;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
890;; i32x4, f32x4, i64x2 or f64x2 suffixes.
891(define_mode_attr i128vldq
892  [(V8SF "f32x4") (V4DF "f64x2")
893   (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
894
895;; Mix-n-match
896(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
897(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
898
899;; Mapping for dbpsabbw modes
900(define_mode_attr dbpsadbwmode
901  [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
902
903;; Mapping suffixes for broadcast
904(define_mode_attr bcstscalarsuff
905  [(V64QI "b")  (V32QI "b") (V16QI "b")
906   (V32HI "w")  (V16HI "w") (V8HI "w")
907   (V16SI "d")  (V8SI "d")  (V4SI "d")
908   (V8DI "q")   (V4DI "q")  (V2DI "q")
909   (V16SF "ss") (V8SF "ss") (V4SF "ss")
910   (V8DF "sd")  (V4DF "sd") (V2DF "sd")])
911
912;; Tie mode of assembler operand to mode iterator
913(define_mode_attr concat_tg_mode
914  [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
915   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
916
917;; Tie mode of assembler operand to mode iterator
918(define_mode_attr xtg_mode
919  [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
920   (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
921   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
922
923;; Half mask mode for unpacks
924(define_mode_attr HALFMASKMODE
925  [(DI "SI") (SI "HI")])
926
927;; Double mask mode for packs
928(define_mode_attr DOUBLEMASKMODE
929  [(HI "SI") (SI "DI")])
930
931
932;; Include define_subst patterns for instructions with mask
933(include "subst.md")
934
935;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
936
937;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
938;;
939;; Move patterns
940;;
941;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
942
943;; All of these patterns are enabled for SSE1 as well as SSE2.
944;; This is essential for maintaining stable calling conventions.
945
946(define_expand "mov<mode>"
947  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
948	(match_operand:VMOVE 1 "nonimmediate_operand"))]
949  "TARGET_SSE"
950{
951  ix86_expand_vector_move (<MODE>mode, operands);
952  DONE;
953})
954
955(define_insn "mov<mode>_internal"
956  [(set (match_operand:VMOVE 0 "nonimmediate_operand"
957	 "=v,v ,v ,m")
958	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
959	 " C,BC,vm,v"))]
960  "TARGET_SSE
961   && (register_operand (operands[0], <MODE>mode)
962       || register_operand (operands[1], <MODE>mode))"
963{
964  switch (get_attr_type (insn))
965    {
966    case TYPE_SSELOG1:
967      return standard_sse_constant_opcode (insn, operands);
968
969    case TYPE_SSEMOV:
970      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
971	 in avx512f, so we need to use workarounds, to access sse registers
972	 16-31, which are evex-only. In avx512vl we don't need workarounds.  */
973      if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
974	  && (EXT_REX_SSE_REG_P (operands[0])
975	      || EXT_REX_SSE_REG_P (operands[1])))
976	{
977	  if (memory_operand (operands[0], <MODE>mode))
978	    {
979	      if (<MODE_SIZE> == 32)
980		return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
981	      else if (<MODE_SIZE> == 16)
982		return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
983	      else
984		gcc_unreachable ();
985	    }
986	  else if (memory_operand (operands[1], <MODE>mode))
987	    {
988	      if (<MODE_SIZE> == 32)
989		return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
990	      else if (<MODE_SIZE> == 16)
991		return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
992	      else
993		gcc_unreachable ();
994	    }
995	  else
996	    /* Reg -> reg move is always aligned.  Just use wider move.  */
997	    switch (get_attr_mode (insn))
998	      {
999	      case MODE_V8SF:
1000	      case MODE_V4SF:
1001		return "vmovaps\t{%g1, %g0|%g0, %g1}";
1002	      case MODE_V4DF:
1003	      case MODE_V2DF:
1004		return "vmovapd\t{%g1, %g0|%g0, %g1}";
1005	      case MODE_OI:
1006	      case MODE_TI:
1007		return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
1008	      default:
1009		gcc_unreachable ();
1010	      }
1011	}
1012
1013      switch (get_attr_mode (insn))
1014	{
1015	case MODE_V16SF:
1016	case MODE_V8SF:
1017	case MODE_V4SF:
1018	  if (misaligned_operand (operands[0], <MODE>mode)
1019	      || misaligned_operand (operands[1], <MODE>mode))
1020	    return "%vmovups\t{%1, %0|%0, %1}";
1021	  else
1022	    return "%vmovaps\t{%1, %0|%0, %1}";
1023
1024	case MODE_V8DF:
1025	case MODE_V4DF:
1026	case MODE_V2DF:
1027	  if (misaligned_operand (operands[0], <MODE>mode)
1028	      || misaligned_operand (operands[1], <MODE>mode))
1029	    return "%vmovupd\t{%1, %0|%0, %1}";
1030	  else
1031	    return "%vmovapd\t{%1, %0|%0, %1}";
1032
1033	case MODE_OI:
1034	case MODE_TI:
1035	  if (misaligned_operand (operands[0], <MODE>mode)
1036	      || misaligned_operand (operands[1], <MODE>mode))
1037	    return TARGET_AVX512VL
1038		   && (<MODE>mode == V4SImode
1039		       || <MODE>mode == V2DImode
1040		       || <MODE>mode == V8SImode
1041		       || <MODE>mode == V4DImode
1042		       || TARGET_AVX512BW)
1043		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1044		   : "%vmovdqu\t{%1, %0|%0, %1}";
1045	  else
1046	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
1047				   : "%vmovdqa\t{%1, %0|%0, %1}";
1048	case MODE_XI:
1049	  if (misaligned_operand (operands[0], <MODE>mode)
1050	      || misaligned_operand (operands[1], <MODE>mode))
1051	    return (<MODE>mode == V16SImode
1052		    || <MODE>mode == V8DImode
1053		    || TARGET_AVX512BW)
1054		   ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1055		   : "vmovdqu64\t{%1, %0|%0, %1}";
1056	  else
1057	    return "vmovdqa64\t{%1, %0|%0, %1}";
1058
1059	default:
1060	  gcc_unreachable ();
1061	}
1062
1063    default:
1064      gcc_unreachable ();
1065    }
1066}
1067  [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1068   (set_attr "prefix" "maybe_vex")
1069   (set (attr "mode")
1070	(cond [(and (eq_attr "alternative" "1")
1071		    (match_test "TARGET_AVX512VL"))
1072		 (const_string "<sseinsnmode>")
1073	       (and (match_test "<MODE_SIZE> == 16")
1074		    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1075			 (and (eq_attr "alternative" "3")
1076			      (match_test "TARGET_SSE_TYPELESS_STORES"))))
1077		 (const_string "<ssePSmode>")
1078	       (match_test "TARGET_AVX")
1079		 (const_string "<sseinsnmode>")
1080	       (ior (not (match_test "TARGET_SSE2"))
1081		    (match_test "optimize_function_for_size_p (cfun)"))
1082		 (const_string "V4SF")
1083	       (and (eq_attr "alternative" "0")
1084		    (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1085		 (const_string "TI")
1086	      ]
1087	      (const_string "<sseinsnmode>")))
1088   (set (attr "enabled")
1089        (cond [(and (match_test "<MODE_SIZE> == 16")
1090		    (eq_attr "alternative" "1"))
1091		 (symbol_ref "TARGET_SSE2")
1092	       (and (match_test "<MODE_SIZE> == 32")
1093		    (eq_attr "alternative" "1"))
1094		 (symbol_ref "TARGET_AVX2")
1095	      ]
1096	      (symbol_ref "true")))])
1097
1098(define_insn "<avx512>_load<mode>_mask"
1099  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1100	(vec_merge:V48_AVX512VL
1101	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1102	  (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
1103	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1104  "TARGET_AVX512F"
1105{
1106  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1107    {
1108      if (misaligned_operand (operands[1], <MODE>mode))
1109	return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1110      else
1111	return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1112    }
1113  else
1114    {
1115      if (misaligned_operand (operands[1], <MODE>mode))
1116	return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1117      else
1118	return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1119    }
1120}
1121  [(set_attr "type" "ssemov")
1122   (set_attr "prefix" "evex")
1123   (set_attr "memory" "none,load")
1124   (set_attr "mode" "<sseinsnmode>")])
1125
1126(define_insn "<avx512>_load<mode>_mask"
1127  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1128	(vec_merge:VI12_AVX512VL
1129	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1130	  (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
1131	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1132  "TARGET_AVX512BW"
1133  "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1134  [(set_attr "type" "ssemov")
1135   (set_attr "prefix" "evex")
1136   (set_attr "memory" "none,load")
1137   (set_attr "mode" "<sseinsnmode>")])
1138
1139(define_insn "<avx512>_blendm<mode>"
1140  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1141	(vec_merge:V48_AVX512VL
1142	  (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1143	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
1144	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1145  "TARGET_AVX512F"
1146  "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1147  [(set_attr "type" "ssemov")
1148   (set_attr "prefix" "evex")
1149   (set_attr "mode" "<sseinsnmode>")])
1150
1151(define_insn "<avx512>_blendm<mode>"
1152  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1153	(vec_merge:VI12_AVX512VL
1154	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1155	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1156	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1157  "TARGET_AVX512BW"
1158  "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1159  [(set_attr "type" "ssemov")
1160   (set_attr "prefix" "evex")
1161   (set_attr "mode" "<sseinsnmode>")])
1162
1163(define_insn "<avx512>_store<mode>_mask"
1164  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1165	(vec_merge:V48_AVX512VL
1166	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
1167	  (match_dup 0)
1168	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1169  "TARGET_AVX512F"
1170{
1171  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1172    {
1173      if (misaligned_operand (operands[0], <MODE>mode))
1174	return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1175      else
1176	return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1177    }
1178  else
1179    {
1180      if (misaligned_operand (operands[0], <MODE>mode))
1181	return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1182      else
1183	return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1184    }
1185}
1186  [(set_attr "type" "ssemov")
1187   (set_attr "prefix" "evex")
1188   (set_attr "memory" "store")
1189   (set_attr "mode" "<sseinsnmode>")])
1190
1191(define_insn "<avx512>_store<mode>_mask"
1192  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1193	(vec_merge:VI12_AVX512VL
1194	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1195	  (match_dup 0)
1196	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1197  "TARGET_AVX512BW"
1198  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1199  [(set_attr "type" "ssemov")
1200   (set_attr "prefix" "evex")
1201   (set_attr "memory" "store")
1202   (set_attr "mode" "<sseinsnmode>")])
1203
1204(define_insn "sse2_movq128"
1205  [(set (match_operand:V2DI 0 "register_operand" "=v")
1206	(vec_concat:V2DI
1207	  (vec_select:DI
1208	    (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1209	    (parallel [(const_int 0)]))
1210	  (const_int 0)))]
1211  "TARGET_SSE2"
1212  "%vmovq\t{%1, %0|%0, %q1}"
1213  [(set_attr "type" "ssemov")
1214   (set_attr "prefix" "maybe_vex")
1215   (set_attr "mode" "TI")])
1216
1217;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1218;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1219;; from memory, we'd prefer to load the memory directly into the %xmm
1220;; register.  To facilitate this happy circumstance, this pattern won't
1221;; split until after register allocation.  If the 64-bit value didn't
1222;; come from memory, this is the best we can do.  This is much better
1223;; than storing %edx:%eax into a stack temporary and loading an %xmm
1224;; from there.
1225
1226(define_insn_and_split "movdi_to_sse"
1227  [(parallel
1228    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1229	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1230     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1231  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1232  "#"
1233  "&& reload_completed"
1234  [(const_int 0)]
1235{
1236 if (register_operand (operands[1], DImode))
1237   {
1238      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1239	 Assemble the 64-bit DImode value in an xmm register.  */
1240      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1241				  gen_lowpart (SImode, operands[1])));
1242      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1243				  gen_highpart (SImode, operands[1])));
1244      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1245					     operands[2]));
1246   }
1247 else if (memory_operand (operands[1], DImode))
1248   {
1249     rtx tmp = gen_reg_rtx (V2DImode);
1250     emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1251     emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1252   }
1253 else
1254   gcc_unreachable ();
1255 DONE;
1256})
1257
1258(define_split
1259  [(set (match_operand:V4SF 0 "register_operand")
1260	(match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1261  "TARGET_SSE && reload_completed"
1262  [(set (match_dup 0)
1263	(vec_merge:V4SF
1264	  (vec_duplicate:V4SF (match_dup 1))
1265	  (match_dup 2)
1266	  (const_int 1)))]
1267{
1268  operands[1] = gen_lowpart (SFmode, operands[1]);
1269  operands[2] = CONST0_RTX (V4SFmode);
1270})
1271
1272(define_split
1273  [(set (match_operand:V2DF 0 "register_operand")
1274	(match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1275  "TARGET_SSE2 && reload_completed"
1276  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1277{
1278  operands[1] = gen_lowpart (DFmode, operands[1]);
1279  operands[2] = CONST0_RTX (DFmode);
1280})
1281
1282(define_expand "movmisalign<mode>"
1283  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1284	(match_operand:VMOVE 1 "nonimmediate_operand"))]
1285  "TARGET_SSE"
1286{
1287  ix86_expand_vector_move_misalign (<MODE>mode, operands);
1288  DONE;
1289})
1290
1291;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1292(define_peephole2
1293  [(set (match_operand:V2DF 0 "sse_reg_operand")
1294	(vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1295			 (match_operand:DF 4 "const0_operand")))
1296   (set (match_operand:V2DF 2 "sse_reg_operand")
1297	(vec_concat:V2DF (vec_select:DF (match_dup 2)
1298					(parallel [(const_int 0)]))
1299			 (match_operand:DF 3 "memory_operand")))]
1300  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1301   && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1302  [(set (match_dup 2) (match_dup 5))]
1303  "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1304
1305(define_peephole2
1306  [(set (match_operand:DF 0 "sse_reg_operand")
1307	(match_operand:DF 1 "memory_operand"))
1308   (set (match_operand:V2DF 2 "sse_reg_operand")
1309	(vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1310			 (match_operand:DF 3 "memory_operand")))]
1311  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1312   && REGNO (operands[4]) == REGNO (operands[2])
1313   && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1314  [(set (match_dup 2) (match_dup 5))]
1315  "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1316
1317;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1318(define_peephole2
1319  [(set (match_operand:DF 0 "memory_operand")
1320	(vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1321		       (parallel [(const_int 0)])))
1322   (set (match_operand:DF 2 "memory_operand")
1323	(vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1324		       (parallel [(const_int 1)])))]
1325  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1326   && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1327  [(set (match_dup 4) (match_dup 1))]
1328  "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1329
1330(define_insn "<sse3>_lddqu<avxsizesuffix>"
1331  [(set (match_operand:VI1 0 "register_operand" "=x")
1332	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1333		    UNSPEC_LDDQU))]
1334  "TARGET_SSE3"
1335  "%vlddqu\t{%1, %0|%0, %1}"
1336  [(set_attr "type" "ssemov")
1337   (set_attr "movu" "1")
1338   (set (attr "prefix_data16")
1339     (if_then_else
1340       (match_test "TARGET_AVX")
1341     (const_string "*")
1342     (const_string "0")))
1343   (set (attr "prefix_rep")
1344     (if_then_else
1345       (match_test "TARGET_AVX")
1346     (const_string "*")
1347     (const_string "1")))
1348   (set_attr "prefix" "maybe_vex")
1349   (set_attr "mode" "<sseinsnmode>")])
1350
1351(define_insn "sse2_movnti<mode>"
1352  [(set (match_operand:SWI48 0 "memory_operand" "=m")
1353	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1354		      UNSPEC_MOVNT))]
1355  "TARGET_SSE2"
1356  "movnti\t{%1, %0|%0, %1}"
1357  [(set_attr "type" "ssemov")
1358   (set_attr "prefix_data16" "0")
1359   (set_attr "mode" "<MODE>")])
1360
1361(define_insn "<sse>_movnt<mode>"
1362  [(set (match_operand:VF 0 "memory_operand" "=m")
1363	(unspec:VF
1364	  [(match_operand:VF 1 "register_operand" "v")]
1365	  UNSPEC_MOVNT))]
1366  "TARGET_SSE"
1367  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1368  [(set_attr "type" "ssemov")
1369   (set_attr "prefix" "maybe_vex")
1370   (set_attr "mode" "<MODE>")])
1371
1372(define_insn "<sse2>_movnt<mode>"
1373  [(set (match_operand:VI8 0 "memory_operand" "=m")
1374	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1375		    UNSPEC_MOVNT))]
1376  "TARGET_SSE2"
1377  "%vmovntdq\t{%1, %0|%0, %1}"
1378  [(set_attr "type" "ssecvt")
1379   (set (attr "prefix_data16")
1380     (if_then_else
1381       (match_test "TARGET_AVX")
1382     (const_string "*")
1383     (const_string "1")))
1384   (set_attr "prefix" "maybe_vex")
1385   (set_attr "mode" "<sseinsnmode>")])
1386
1387; Expand patterns for non-temporal stores.  At the moment, only those
1388; that directly map to insns are defined; it would be possible to
1389; define patterns for other modes that would expand to several insns.
1390
1391;; Modes handled by storent patterns.
1392(define_mode_iterator STORENT_MODE
1393  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1394   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1395   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1396   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1397   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1398
1399(define_expand "storent<mode>"
1400  [(set (match_operand:STORENT_MODE 0 "memory_operand")
1401	(unspec:STORENT_MODE
1402	  [(match_operand:STORENT_MODE 1 "register_operand")]
1403	  UNSPEC_MOVNT))]
1404  "TARGET_SSE")
1405
1406;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1407;;
1408;; Mask operations
1409;;
1410;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1411
1412;; All integer modes with AVX512BW/DQ.
1413(define_mode_iterator SWI1248_AVX512BWDQ
1414  [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1415
1416;; All integer modes with AVX512BW, where HImode operation
1417;; can be used instead of QImode.
1418(define_mode_iterator SWI1248_AVX512BW
1419  [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1420
1421;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1422(define_mode_iterator SWI1248_AVX512BWDQ2
1423  [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1424   (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1425
1426(define_expand "kmov<mskmodesuffix>"
1427  [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1428	(match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1429  "TARGET_AVX512F
1430   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1431
1432(define_insn "k<code><mode>"
1433  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1434	(any_logic:SWI1248_AVX512BW
1435	  (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1436	  (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1437   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1438  "TARGET_AVX512F"
1439{
1440  if (get_attr_mode (insn) == MODE_HI)
1441    return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1442  else
1443    return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1444}
1445  [(set_attr "type" "msklog")
1446   (set_attr "prefix" "vex")
1447   (set (attr "mode")
1448     (cond [(and (match_test "<MODE>mode == QImode")
1449		 (not (match_test "TARGET_AVX512DQ")))
1450	       (const_string "HI")
1451	   ]
1452	   (const_string "<MODE>")))])
1453
1454(define_insn "kandn<mode>"
1455  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1456	(and:SWI1248_AVX512BW
1457	  (not:SWI1248_AVX512BW
1458	    (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1459	  (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1460   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1461  "TARGET_AVX512F"
1462{
1463  if (get_attr_mode (insn) == MODE_HI)
1464    return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1465  else
1466    return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1467}
1468  [(set_attr "type" "msklog")
1469   (set_attr "prefix" "vex")
1470   (set (attr "mode")
1471     (cond [(and (match_test "<MODE>mode == QImode")
1472		 (not (match_test "TARGET_AVX512DQ")))
1473	      (const_string "HI")
1474	   ]
1475	   (const_string "<MODE>")))])
1476
1477(define_insn "kxnor<mode>"
1478  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1479	(not:SWI1248_AVX512BW
1480	  (xor:SWI1248_AVX512BW
1481	    (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1482	    (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1483   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1484  "TARGET_AVX512F"
1485{
1486  if (get_attr_mode (insn) == MODE_HI)
1487    return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1488  else
1489    return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1490}
1491  [(set_attr "type" "msklog")
1492   (set_attr "prefix" "vex")
1493   (set (attr "mode")
1494     (cond [(and (match_test "<MODE>mode == QImode")
1495		 (not (match_test "TARGET_AVX512DQ")))
1496	      (const_string "HI")
1497	   ]
1498	   (const_string "<MODE>")))])
1499
1500(define_insn "knot<mode>"
1501  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1502	(not:SWI1248_AVX512BW
1503	  (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1504   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1505  "TARGET_AVX512F"
1506{
1507  if (get_attr_mode (insn) == MODE_HI)
1508    return "knotw\t{%1, %0|%0, %1}";
1509  else
1510    return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1511}
1512  [(set_attr "type" "msklog")
1513   (set_attr "prefix" "vex")
1514   (set (attr "mode")
1515     (cond [(and (match_test "<MODE>mode == QImode")
1516		 (not (match_test "TARGET_AVX512DQ")))
1517	       (const_string "HI")
1518	   ]
1519	   (const_string "<MODE>")))])
1520
1521(define_insn "kadd<mode>"
1522  [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1523	(plus:SWI1248_AVX512BWDQ2
1524	  (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1525	  (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1526   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1527  "TARGET_AVX512F"
1528  "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1529  [(set_attr "type" "msklog")
1530   (set_attr "prefix" "vex")
1531   (set_attr "mode" "<MODE>")])
1532
1533;; Mask variant shift mnemonics
1534(define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1535
1536(define_insn "k<code><mode>"
1537  [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1538	(any_lshift:SWI1248_AVX512BWDQ
1539	  (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1540	  (match_operand:QI 2 "immediate_operand" "n")))
1541   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1542  "TARGET_AVX512F"
1543  "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1544  [(set_attr "type" "msklog")
1545   (set_attr "prefix" "vex")
1546   (set_attr "mode" "<MODE>")])
1547
1548(define_insn "ktest<mode>"
1549  [(set (reg:CC FLAGS_REG)
1550	(unspec:CC
1551	  [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1552	   (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1553	  UNSPEC_KTEST))]
1554  "TARGET_AVX512F"
1555  "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1556  [(set_attr "mode" "<MODE>")
1557   (set_attr "type" "msklog")
1558   (set_attr "prefix" "vex")])
1559
1560(define_insn "kortest<mode>"
1561  [(set (reg:CC FLAGS_REG)
1562	(unspec:CC
1563	  [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1564	   (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1565	  UNSPEC_KORTEST))]
1566  "TARGET_AVX512F"
1567  "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1568  [(set_attr "mode" "<MODE>")
1569   (set_attr "type" "msklog")
1570   (set_attr "prefix" "vex")])
1571
1572(define_insn "kunpckhi"
1573  [(set (match_operand:HI 0 "register_operand" "=k")
1574	(ior:HI
1575	  (ashift:HI
1576	    (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1577	    (const_int 8))
1578	  (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1579  "TARGET_AVX512F"
1580  "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1581  [(set_attr "mode" "HI")
1582   (set_attr "type" "msklog")
1583   (set_attr "prefix" "vex")])
1584
1585(define_insn "kunpcksi"
1586  [(set (match_operand:SI 0 "register_operand" "=k")
1587	(ior:SI
1588	  (ashift:SI
1589	    (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1590	    (const_int 16))
1591	  (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1592  "TARGET_AVX512BW"
1593  "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1594  [(set_attr "mode" "SI")])
1595
1596(define_insn "kunpckdi"
1597  [(set (match_operand:DI 0 "register_operand" "=k")
1598	(ior:DI
1599	  (ashift:DI
1600	    (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1601	    (const_int 32))
1602	  (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1603  "TARGET_AVX512BW"
1604  "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1605  [(set_attr "mode" "DI")])
1606
1607
1608;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1609;;
1610;; Parallel floating point arithmetic
1611;;
1612;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1613
1614(define_expand "<code><mode>2"
1615  [(set (match_operand:VF 0 "register_operand")
1616	(absneg:VF
1617	  (match_operand:VF 1 "register_operand")))]
1618  "TARGET_SSE"
1619  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1620
1621(define_insn_and_split "*absneg<mode>2"
1622  [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1623	(match_operator:VF 3 "absneg_operator"
1624	  [(match_operand:VF 1 "vector_operand" "0,  xBm,v, m")]))
1625   (use (match_operand:VF 2 "vector_operand"    "xBm,0,  vm,v"))]
1626  "TARGET_SSE"
1627  "#"
1628  "&& reload_completed"
1629  [(const_int 0)]
1630{
1631  enum rtx_code absneg_op;
1632  rtx op1, op2;
1633  rtx t;
1634
1635  if (TARGET_AVX)
1636    {
1637      if (MEM_P (operands[1]))
1638	op1 = operands[2], op2 = operands[1];
1639      else
1640	op1 = operands[1], op2 = operands[2];
1641    }
1642  else
1643    {
1644      op1 = operands[0];
1645      if (rtx_equal_p (operands[0], operands[1]))
1646	op2 = operands[2];
1647      else
1648	op2 = operands[1];
1649    }
1650
1651  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1652  t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1653  t = gen_rtx_SET (operands[0], t);
1654  emit_insn (t);
1655  DONE;
1656}
1657  [(set_attr "isa" "noavx,noavx,avx,avx")])
1658
1659(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1660  [(set (match_operand:VF 0 "register_operand")
1661	(plusminus:VF
1662	  (match_operand:VF 1 "<round_nimm_predicate>")
1663	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1664  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1665  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1666
1667(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1668  [(set (match_operand:VF 0 "register_operand" "=x,v")
1669	(plusminus:VF
1670	  (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1671	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1672  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1673   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1674  "@
1675   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1676   v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1677  [(set_attr "isa" "noavx,avx")
1678   (set_attr "type" "sseadd")
1679   (set_attr "prefix" "<mask_prefix3>")
1680   (set_attr "mode" "<MODE>")])
1681
1682(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1683  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1684	(vec_merge:VF_128
1685	  (plusminus:VF_128
1686	    (match_operand:VF_128 1 "register_operand" "0,v")
1687	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1688	  (match_dup 1)
1689	  (const_int 1)))]
1690  "TARGET_SSE"
1691  "@
1692   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1693   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1694  [(set_attr "isa" "noavx,avx")
1695   (set_attr "type" "sseadd")
1696   (set_attr "prefix" "<round_scalar_prefix>")
1697   (set_attr "mode" "<ssescalarmode>")])
1698
1699(define_expand "mul<mode>3<mask_name><round_name>"
1700  [(set (match_operand:VF 0 "register_operand")
1701	(mult:VF
1702	  (match_operand:VF 1 "<round_nimm_predicate>")
1703	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1704  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1705  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1706
1707(define_insn "*mul<mode>3<mask_name><round_name>"
1708  [(set (match_operand:VF 0 "register_operand" "=x,v")
1709	(mult:VF
1710	  (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1711	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1712  "TARGET_SSE
1713   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1714   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1715  "@
1716   mul<ssemodesuffix>\t{%2, %0|%0, %2}
1717   vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1718  [(set_attr "isa" "noavx,avx")
1719   (set_attr "type" "ssemul")
1720   (set_attr "prefix" "<mask_prefix3>")
1721   (set_attr "btver2_decode" "direct,double")
1722   (set_attr "mode" "<MODE>")])
1723
1724(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1725  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1726	(vec_merge:VF_128
1727	  (multdiv:VF_128
1728	    (match_operand:VF_128 1 "register_operand" "0,v")
1729	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1730	  (match_dup 1)
1731	  (const_int 1)))]
1732  "TARGET_SSE"
1733  "@
1734   <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1735   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1736  [(set_attr "isa" "noavx,avx")
1737   (set_attr "type" "sse<multdiv_mnemonic>")
1738   (set_attr "prefix" "<round_scalar_prefix>")
1739   (set_attr "btver2_decode" "direct,double")
1740   (set_attr "mode" "<ssescalarmode>")])
1741
1742(define_expand "div<mode>3"
1743  [(set (match_operand:VF2 0 "register_operand")
1744	(div:VF2 (match_operand:VF2 1 "register_operand")
1745		 (match_operand:VF2 2 "vector_operand")))]
1746  "TARGET_SSE2"
1747  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1748
1749(define_expand "div<mode>3"
1750  [(set (match_operand:VF1 0 "register_operand")
1751	(div:VF1 (match_operand:VF1 1 "register_operand")
1752		 (match_operand:VF1 2 "vector_operand")))]
1753  "TARGET_SSE"
1754{
1755  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1756
1757  if (TARGET_SSE_MATH
1758      && TARGET_RECIP_VEC_DIV
1759      && !optimize_insn_for_size_p ()
1760      && flag_finite_math_only && !flag_trapping_math
1761      && flag_unsafe_math_optimizations)
1762    {
1763      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1764      DONE;
1765    }
1766})
1767
1768(define_insn "<sse>_div<mode>3<mask_name><round_name>"
1769  [(set (match_operand:VF 0 "register_operand" "=x,v")
1770	(div:VF
1771	  (match_operand:VF 1 "register_operand" "0,v")
1772	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1773  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1774  "@
1775   div<ssemodesuffix>\t{%2, %0|%0, %2}
1776   vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1777  [(set_attr "isa" "noavx,avx")
1778   (set_attr "type" "ssediv")
1779   (set_attr "prefix" "<mask_prefix3>")
1780   (set_attr "mode" "<MODE>")])
1781
1782(define_insn "<sse>_rcp<mode>2"
1783  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1784	(unspec:VF1_128_256
1785	  [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1786  "TARGET_SSE"
1787  "%vrcpps\t{%1, %0|%0, %1}"
1788  [(set_attr "type" "sse")
1789   (set_attr "atom_sse_attr" "rcp")
1790   (set_attr "btver2_sse_attr" "rcp")
1791   (set_attr "prefix" "maybe_vex")
1792   (set_attr "mode" "<MODE>")])
1793
1794(define_insn "sse_vmrcpv4sf2"
1795  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1796	(vec_merge:V4SF
1797	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1798		       UNSPEC_RCP)
1799	  (match_operand:V4SF 2 "register_operand" "0,x")
1800	  (const_int 1)))]
1801  "TARGET_SSE"
1802  "@
1803   rcpss\t{%1, %0|%0, %k1}
1804   vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1805  [(set_attr "isa" "noavx,avx")
1806   (set_attr "type" "sse")
1807   (set_attr "atom_sse_attr" "rcp")
1808   (set_attr "btver2_sse_attr" "rcp")
1809   (set_attr "prefix" "orig,vex")
1810   (set_attr "mode" "SF")])
1811
1812(define_insn "<mask_codefor>rcp14<mode><mask_name>"
1813  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1814	(unspec:VF_AVX512VL
1815	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1816	  UNSPEC_RCP14))]
1817  "TARGET_AVX512F"
1818  "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1819  [(set_attr "type" "sse")
1820   (set_attr "prefix" "evex")
1821   (set_attr "mode" "<MODE>")])
1822
1823(define_insn "srcp14<mode>"
1824  [(set (match_operand:VF_128 0 "register_operand" "=v")
1825	(vec_merge:VF_128
1826	  (unspec:VF_128
1827	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1828	    UNSPEC_RCP14)
1829	  (match_operand:VF_128 2 "register_operand" "v")
1830	  (const_int 1)))]
1831  "TARGET_AVX512F"
1832  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1833  [(set_attr "type" "sse")
1834   (set_attr "prefix" "evex")
1835   (set_attr "mode" "<MODE>")])
1836
1837(define_insn "srcp14<mode>_mask"
1838  [(set (match_operand:VF_128 0 "register_operand" "=v")
1839	(vec_merge:VF_128
1840	  (vec_merge:VF_128
1841	    (unspec:VF_128
1842	      [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1843	    UNSPEC_RCP14)
1844	      (match_operand:VF_128 3 "vector_move_operand" "0C")
1845	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1846	  (match_operand:VF_128 2 "register_operand" "v")
1847	  (const_int 1)))]
1848  "TARGET_AVX512F"
1849  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1850  [(set_attr "type" "sse")
1851   (set_attr "prefix" "evex")
1852   (set_attr "mode" "<MODE>")])
1853
1854(define_expand "sqrt<mode>2"
1855  [(set (match_operand:VF2 0 "register_operand")
1856	(sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1857  "TARGET_SSE2")
1858
1859(define_expand "sqrt<mode>2"
1860  [(set (match_operand:VF1 0 "register_operand")
1861	(sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1862  "TARGET_SSE"
1863{
1864  if (TARGET_SSE_MATH
1865      && TARGET_RECIP_VEC_SQRT
1866      && !optimize_insn_for_size_p ()
1867      && flag_finite_math_only && !flag_trapping_math
1868      && flag_unsafe_math_optimizations)
1869    {
1870      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1871      DONE;
1872    }
1873})
1874
1875(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1876  [(set (match_operand:VF 0 "register_operand" "=x,v")
1877	(sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1878  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1879  "@
1880   sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1881   vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1882  [(set_attr "isa" "noavx,avx")
1883   (set_attr "type" "sse")
1884   (set_attr "atom_sse_attr" "sqrt")
1885   (set_attr "btver2_sse_attr" "sqrt")
1886   (set_attr "prefix" "maybe_vex")
1887   (set_attr "mode" "<MODE>")])
1888
1889(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
1890  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1891	(vec_merge:VF_128
1892	  (sqrt:VF_128
1893	    (match_operand:VF_128 1 "vector_operand" "xBm,<round_scalar_constraint>"))
1894	  (match_operand:VF_128 2 "register_operand" "0,v")
1895	  (const_int 1)))]
1896  "TARGET_SSE"
1897  "@
1898   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1899   vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
1900  [(set_attr "isa" "noavx,avx")
1901   (set_attr "type" "sse")
1902   (set_attr "atom_sse_attr" "sqrt")
1903   (set_attr "prefix" "<round_scalar_prefix>")
1904   (set_attr "btver2_sse_attr" "sqrt")
1905   (set_attr "mode" "<ssescalarmode>")])
1906
1907(define_expand "rsqrt<mode>2"
1908  [(set (match_operand:VF1_128_256 0 "register_operand")
1909	(unspec:VF1_128_256
1910	  [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1911  "TARGET_SSE_MATH"
1912{
1913  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1914  DONE;
1915})
1916
1917(define_expand "rsqrtv16sf2"
1918  [(set (match_operand:V16SF 0 "register_operand")
1919	(unspec:V16SF
1920	  [(match_operand:V16SF 1 "vector_operand")]
1921	  UNSPEC_RSQRT28))]
1922  "TARGET_SSE_MATH && TARGET_AVX512ER"
1923{
1924  ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
1925  DONE;
1926})
1927
1928(define_insn "<sse>_rsqrt<mode>2"
1929  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1930	(unspec:VF1_128_256
1931	  [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1932  "TARGET_SSE"
1933  "%vrsqrtps\t{%1, %0|%0, %1}"
1934  [(set_attr "type" "sse")
1935   (set_attr "prefix" "maybe_vex")
1936   (set_attr "mode" "<MODE>")])
1937
1938(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1939  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1940	(unspec:VF_AVX512VL
1941	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1942	  UNSPEC_RSQRT14))]
1943  "TARGET_AVX512F"
1944  "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1945  [(set_attr "type" "sse")
1946   (set_attr "prefix" "evex")
1947   (set_attr "mode" "<MODE>")])
1948
1949(define_insn "rsqrt14<mode>"
1950  [(set (match_operand:VF_128 0 "register_operand" "=v")
1951	(vec_merge:VF_128
1952	  (unspec:VF_128
1953	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1954	    UNSPEC_RSQRT14)
1955	  (match_operand:VF_128 2 "register_operand" "v")
1956	  (const_int 1)))]
1957  "TARGET_AVX512F"
1958  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1959  [(set_attr "type" "sse")
1960   (set_attr "prefix" "evex")
1961   (set_attr "mode" "<MODE>")])
1962
1963(define_insn "rsqrt14_<mode>_mask"
1964  [(set (match_operand:VF_128 0 "register_operand" "=v")
1965	(vec_merge:VF_128
1966	  (vec_merge:VF_128
1967	    (unspec:VF_128
1968	      [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1969	      UNSPEC_RSQRT14)
1970	      (match_operand:VF_128 3 "vector_move_operand" "0C")
1971	      (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1972	  (match_operand:VF_128 2 "register_operand" "v")
1973	  (const_int 1)))]
1974  "TARGET_AVX512F"
1975  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1976  [(set_attr "type" "sse")
1977   (set_attr "prefix" "evex")
1978   (set_attr "mode" "<MODE>")])
1979
1980(define_insn "sse_vmrsqrtv4sf2"
1981  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1982	(vec_merge:V4SF
1983	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1984		       UNSPEC_RSQRT)
1985	  (match_operand:V4SF 2 "register_operand" "0,x")
1986	  (const_int 1)))]
1987  "TARGET_SSE"
1988  "@
1989   rsqrtss\t{%1, %0|%0, %k1}
1990   vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1991  [(set_attr "isa" "noavx,avx")
1992   (set_attr "type" "sse")
1993   (set_attr "prefix" "orig,vex")
1994   (set_attr "mode" "SF")])
1995
1996(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1997  [(set (match_operand:VF 0 "register_operand")
1998	(smaxmin:VF
1999	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2000	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2001  "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2002{
2003  if (!flag_finite_math_only || flag_signed_zeros)
2004    {
2005      operands[1] = force_reg (<MODE>mode, operands[1]);
2006      emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2007		 (operands[0], operands[1], operands[2]
2008		  <mask_operand_arg34>
2009		  <round_saeonly_mask_arg3>));
2010      DONE;
2011    }
2012  else
2013    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2014})
2015
2016;; These versions of the min/max patterns are intentionally ignorant of
2017;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2018;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2019;; are undefined in this condition, we're certain this is correct.
2020
2021(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2022  [(set (match_operand:VF 0 "register_operand" "=x,v")
2023	(smaxmin:VF
2024	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2025	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2026  "TARGET_SSE
2027   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2028   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2029  "@
2030   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2031   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2032  [(set_attr "isa" "noavx,avx")
2033   (set_attr "type" "sseadd")
2034   (set_attr "btver2_sse_attr" "maxmin")
2035   (set_attr "prefix" "<mask_prefix3>")
2036   (set_attr "mode" "<MODE>")])
2037
2038;; These versions of the min/max patterns implement exactly the operations
2039;;   min = (op1 < op2 ? op1 : op2)
2040;;   max = (!(op1 < op2) ? op1 : op2)
2041;; Their operands are not commutative, and thus they may be used in the
2042;; presence of -0.0 and NaN.
2043
2044(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2045  [(set (match_operand:VF 0 "register_operand" "=x,v")
2046	(unspec:VF
2047	  [(match_operand:VF 1 "register_operand" "0,v")
2048	   (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2049	  IEEE_MAXMIN))]
2050  "TARGET_SSE
2051   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2052  "@
2053   <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2054   v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2055  [(set_attr "isa" "noavx,avx")
2056   (set_attr "type" "sseadd")
2057   (set_attr "btver2_sse_attr" "maxmin")
2058   (set_attr "prefix" "<mask_prefix3>")
2059   (set_attr "mode" "<MODE>")])
2060
2061(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2062  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2063	(vec_merge:VF_128
2064	  (smaxmin:VF_128
2065	    (match_operand:VF_128 1 "register_operand" "0,v")
2066	    (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2067	 (match_dup 1)
2068	 (const_int 1)))]
2069  "TARGET_SSE"
2070  "@
2071   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2072   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2073  [(set_attr "isa" "noavx,avx")
2074   (set_attr "type" "sse")
2075   (set_attr "btver2_sse_attr" "maxmin")
2076   (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2077   (set_attr "mode" "<ssescalarmode>")])
2078
2079(define_insn "avx_addsubv4df3"
2080  [(set (match_operand:V4DF 0 "register_operand" "=x")
2081	(vec_merge:V4DF
2082	  (minus:V4DF
2083	    (match_operand:V4DF 1 "register_operand" "x")
2084	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2085	  (plus:V4DF (match_dup 1) (match_dup 2))
2086	  (const_int 5)))]
2087  "TARGET_AVX"
2088  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2089  [(set_attr "type" "sseadd")
2090   (set_attr "prefix" "vex")
2091   (set_attr "mode" "V4DF")])
2092
2093(define_insn "sse3_addsubv2df3"
2094  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2095	(vec_merge:V2DF
2096	  (minus:V2DF
2097	    (match_operand:V2DF 1 "register_operand" "0,x")
2098	    (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2099	  (plus:V2DF (match_dup 1) (match_dup 2))
2100	  (const_int 1)))]
2101  "TARGET_SSE3"
2102  "@
2103   addsubpd\t{%2, %0|%0, %2}
2104   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2105  [(set_attr "isa" "noavx,avx")
2106   (set_attr "type" "sseadd")
2107   (set_attr "atom_unit" "complex")
2108   (set_attr "prefix" "orig,vex")
2109   (set_attr "mode" "V2DF")])
2110
2111(define_insn "avx_addsubv8sf3"
2112  [(set (match_operand:V8SF 0 "register_operand" "=x")
2113	(vec_merge:V8SF
2114	  (minus:V8SF
2115	    (match_operand:V8SF 1 "register_operand" "x")
2116	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2117	  (plus:V8SF (match_dup 1) (match_dup 2))
2118	  (const_int 85)))]
2119  "TARGET_AVX"
2120  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2121  [(set_attr "type" "sseadd")
2122   (set_attr "prefix" "vex")
2123   (set_attr "mode" "V8SF")])
2124
2125(define_insn "sse3_addsubv4sf3"
2126  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2127	(vec_merge:V4SF
2128	  (minus:V4SF
2129	    (match_operand:V4SF 1 "register_operand" "0,x")
2130	    (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2131	  (plus:V4SF (match_dup 1) (match_dup 2))
2132	  (const_int 5)))]
2133  "TARGET_SSE3"
2134  "@
2135   addsubps\t{%2, %0|%0, %2}
2136   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2137  [(set_attr "isa" "noavx,avx")
2138   (set_attr "type" "sseadd")
2139   (set_attr "prefix" "orig,vex")
2140   (set_attr "prefix_rep" "1,*")
2141   (set_attr "mode" "V4SF")])
2142
2143(define_split
2144  [(set (match_operand:VF_128_256 0 "register_operand")
2145	(match_operator:VF_128_256 6 "addsub_vm_operator"
2146	  [(minus:VF_128_256
2147	     (match_operand:VF_128_256 1 "register_operand")
2148	     (match_operand:VF_128_256 2 "vector_operand"))
2149	   (plus:VF_128_256
2150	     (match_operand:VF_128_256 3 "vector_operand")
2151	     (match_operand:VF_128_256 4 "vector_operand"))
2152	   (match_operand 5 "const_int_operand")]))]
2153  "TARGET_SSE3
2154   && can_create_pseudo_p ()
2155   && ((rtx_equal_p (operands[1], operands[3])
2156	&& rtx_equal_p (operands[2], operands[4]))
2157       || (rtx_equal_p (operands[1], operands[4])
2158	   && rtx_equal_p (operands[2], operands[3])))"
2159  [(set (match_dup 0)
2160	(vec_merge:VF_128_256
2161	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
2162	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
2163	  (match_dup 5)))])
2164
2165(define_split
2166  [(set (match_operand:VF_128_256 0 "register_operand")
2167	(match_operator:VF_128_256 6 "addsub_vm_operator"
2168	  [(plus:VF_128_256
2169	     (match_operand:VF_128_256 1 "vector_operand")
2170	     (match_operand:VF_128_256 2 "vector_operand"))
2171	   (minus:VF_128_256
2172	     (match_operand:VF_128_256 3 "register_operand")
2173	     (match_operand:VF_128_256 4 "vector_operand"))
2174	   (match_operand 5 "const_int_operand")]))]
2175  "TARGET_SSE3
2176   && can_create_pseudo_p ()
2177   && ((rtx_equal_p (operands[1], operands[3])
2178	&& rtx_equal_p (operands[2], operands[4]))
2179       || (rtx_equal_p (operands[1], operands[4])
2180	   && rtx_equal_p (operands[2], operands[3])))"
2181  [(set (match_dup 0)
2182	(vec_merge:VF_128_256
2183	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
2184	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
2185	  (match_dup 5)))]
2186{
2187  /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes.  */
2188  operands[5]
2189    = GEN_INT (~INTVAL (operands[5])
2190	       & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2191})
2192
2193(define_split
2194  [(set (match_operand:VF_128_256 0 "register_operand")
2195	(match_operator:VF_128_256 7 "addsub_vs_operator"
2196	  [(vec_concat:<ssedoublemode>
2197	     (minus:VF_128_256
2198	       (match_operand:VF_128_256 1 "register_operand")
2199	       (match_operand:VF_128_256 2 "vector_operand"))
2200	     (plus:VF_128_256
2201	       (match_operand:VF_128_256 3 "vector_operand")
2202	       (match_operand:VF_128_256 4 "vector_operand")))
2203	   (match_parallel 5 "addsub_vs_parallel"
2204	     [(match_operand 6 "const_int_operand")])]))]
2205  "TARGET_SSE3
2206   && can_create_pseudo_p ()
2207   && ((rtx_equal_p (operands[1], operands[3])
2208	&& rtx_equal_p (operands[2], operands[4]))
2209       || (rtx_equal_p (operands[1], operands[4])
2210	   && rtx_equal_p (operands[2], operands[3])))"
2211  [(set (match_dup 0)
2212	(vec_merge:VF_128_256
2213	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
2214	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
2215	  (match_dup 5)))]
2216{
2217  int i, nelt = XVECLEN (operands[5], 0);
2218  HOST_WIDE_INT ival = 0;
2219
2220  for (i = 0; i < nelt; i++)
2221    if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2222      ival |= HOST_WIDE_INT_1 << i;
2223
2224  operands[5] = GEN_INT (ival);
2225})
2226
2227(define_split
2228  [(set (match_operand:VF_128_256 0 "register_operand")
2229	(match_operator:VF_128_256 7 "addsub_vs_operator"
2230	  [(vec_concat:<ssedoublemode>
2231	     (plus:VF_128_256
2232	       (match_operand:VF_128_256 1 "vector_operand")
2233	       (match_operand:VF_128_256 2 "vector_operand"))
2234	     (minus:VF_128_256
2235	       (match_operand:VF_128_256 3 "register_operand")
2236	       (match_operand:VF_128_256 4 "vector_operand")))
2237	   (match_parallel 5 "addsub_vs_parallel"
2238	     [(match_operand 6 "const_int_operand")])]))]
2239  "TARGET_SSE3
2240   && can_create_pseudo_p ()
2241   && ((rtx_equal_p (operands[1], operands[3])
2242	&& rtx_equal_p (operands[2], operands[4]))
2243       || (rtx_equal_p (operands[1], operands[4])
2244	   && rtx_equal_p (operands[2], operands[3])))"
2245  [(set (match_dup 0)
2246	(vec_merge:VF_128_256
2247	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
2248	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
2249	  (match_dup 5)))]
2250{
2251  int i, nelt = XVECLEN (operands[5], 0);
2252  HOST_WIDE_INT ival = 0;
2253
2254  for (i = 0; i < nelt; i++)
2255    if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2256      ival |= HOST_WIDE_INT_1 << i;
2257
2258  operands[5] = GEN_INT (ival);
2259})
2260
2261(define_insn "avx_h<plusminus_insn>v4df3"
2262  [(set (match_operand:V4DF 0 "register_operand" "=x")
2263	(vec_concat:V4DF
2264	  (vec_concat:V2DF
2265	    (plusminus:DF
2266	      (vec_select:DF
2267		(match_operand:V4DF 1 "register_operand" "x")
2268		(parallel [(const_int 0)]))
2269	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2270	    (plusminus:DF
2271	      (vec_select:DF
2272		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
2273		(parallel [(const_int 0)]))
2274	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2275	  (vec_concat:V2DF
2276	    (plusminus:DF
2277	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2278	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2279	    (plusminus:DF
2280	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2281	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2282  "TARGET_AVX"
2283  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2284  [(set_attr "type" "sseadd")
2285   (set_attr "prefix" "vex")
2286   (set_attr "mode" "V4DF")])
2287
2288(define_expand "sse3_haddv2df3"
2289  [(set (match_operand:V2DF 0 "register_operand")
2290	(vec_concat:V2DF
2291	  (plus:DF
2292	    (vec_select:DF
2293	      (match_operand:V2DF 1 "register_operand")
2294	      (parallel [(const_int 0)]))
2295	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2296	  (plus:DF
2297	    (vec_select:DF
2298	      (match_operand:V2DF 2 "vector_operand")
2299	      (parallel [(const_int 0)]))
2300	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2301  "TARGET_SSE3")
2302
2303(define_insn "*sse3_haddv2df3"
2304  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2305	(vec_concat:V2DF
2306	  (plus:DF
2307	    (vec_select:DF
2308	      (match_operand:V2DF 1 "register_operand" "0,x")
2309	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2310	    (vec_select:DF
2311	      (match_dup 1)
2312	      (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2313	  (plus:DF
2314	    (vec_select:DF
2315	      (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2316	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2317	    (vec_select:DF
2318	      (match_dup 2)
2319	      (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2320  "TARGET_SSE3
2321   && INTVAL (operands[3]) != INTVAL (operands[4])
2322   && INTVAL (operands[5]) != INTVAL (operands[6])"
2323  "@
2324   haddpd\t{%2, %0|%0, %2}
2325   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2326  [(set_attr "isa" "noavx,avx")
2327   (set_attr "type" "sseadd")
2328   (set_attr "prefix" "orig,vex")
2329   (set_attr "mode" "V2DF")])
2330
2331(define_insn "sse3_hsubv2df3"
2332  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2333	(vec_concat:V2DF
2334	  (minus:DF
2335	    (vec_select:DF
2336	      (match_operand:V2DF 1 "register_operand" "0,x")
2337	      (parallel [(const_int 0)]))
2338	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2339	  (minus:DF
2340	    (vec_select:DF
2341	      (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2342	      (parallel [(const_int 0)]))
2343	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2344  "TARGET_SSE3"
2345  "@
2346   hsubpd\t{%2, %0|%0, %2}
2347   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2348  [(set_attr "isa" "noavx,avx")
2349   (set_attr "type" "sseadd")
2350   (set_attr "prefix" "orig,vex")
2351   (set_attr "mode" "V2DF")])
2352
2353(define_insn "*sse3_haddv2df3_low"
2354  [(set (match_operand:DF 0 "register_operand" "=x,x")
2355	(plus:DF
2356	  (vec_select:DF
2357	    (match_operand:V2DF 1 "register_operand" "0,x")
2358	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2359	  (vec_select:DF
2360	    (match_dup 1)
2361	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2362  "TARGET_SSE3
2363   && INTVAL (operands[2]) != INTVAL (operands[3])"
2364  "@
2365   haddpd\t{%0, %0|%0, %0}
2366   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2367  [(set_attr "isa" "noavx,avx")
2368   (set_attr "type" "sseadd1")
2369   (set_attr "prefix" "orig,vex")
2370   (set_attr "mode" "V2DF")])
2371
2372(define_insn "*sse3_hsubv2df3_low"
2373  [(set (match_operand:DF 0 "register_operand" "=x,x")
2374	(minus:DF
2375	  (vec_select:DF
2376	    (match_operand:V2DF 1 "register_operand" "0,x")
2377	    (parallel [(const_int 0)]))
2378	  (vec_select:DF
2379	    (match_dup 1)
2380	    (parallel [(const_int 1)]))))]
2381  "TARGET_SSE3"
2382  "@
2383   hsubpd\t{%0, %0|%0, %0}
2384   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2385  [(set_attr "isa" "noavx,avx")
2386   (set_attr "type" "sseadd1")
2387   (set_attr "prefix" "orig,vex")
2388   (set_attr "mode" "V2DF")])
2389
2390(define_insn "avx_h<plusminus_insn>v8sf3"
2391  [(set (match_operand:V8SF 0 "register_operand" "=x")
2392	(vec_concat:V8SF
2393	  (vec_concat:V4SF
2394	    (vec_concat:V2SF
2395	      (plusminus:SF
2396		(vec_select:SF
2397		  (match_operand:V8SF 1 "register_operand" "x")
2398		  (parallel [(const_int 0)]))
2399		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2400	      (plusminus:SF
2401		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2402		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2403	    (vec_concat:V2SF
2404	      (plusminus:SF
2405		(vec_select:SF
2406		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2407		  (parallel [(const_int 0)]))
2408		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2409	      (plusminus:SF
2410		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2411		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2412	  (vec_concat:V4SF
2413	    (vec_concat:V2SF
2414	      (plusminus:SF
2415		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2416		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2417	      (plusminus:SF
2418		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2419		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2420	    (vec_concat:V2SF
2421	      (plusminus:SF
2422		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2423		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2424	      (plusminus:SF
2425		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2426		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2427  "TARGET_AVX"
2428  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2429  [(set_attr "type" "sseadd")
2430   (set_attr "prefix" "vex")
2431   (set_attr "mode" "V8SF")])
2432
2433(define_insn "sse3_h<plusminus_insn>v4sf3"
2434  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2435	(vec_concat:V4SF
2436	  (vec_concat:V2SF
2437	    (plusminus:SF
2438	      (vec_select:SF
2439		(match_operand:V4SF 1 "register_operand" "0,x")
2440		(parallel [(const_int 0)]))
2441	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2442	    (plusminus:SF
2443	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2444	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2445	  (vec_concat:V2SF
2446	    (plusminus:SF
2447	      (vec_select:SF
2448		(match_operand:V4SF 2 "vector_operand" "xBm,xm")
2449		(parallel [(const_int 0)]))
2450	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2451	    (plusminus:SF
2452	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2453	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2454  "TARGET_SSE3"
2455  "@
2456   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2457   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2458  [(set_attr "isa" "noavx,avx")
2459   (set_attr "type" "sseadd")
2460   (set_attr "atom_unit" "complex")
2461   (set_attr "prefix" "orig,vex")
2462   (set_attr "prefix_rep" "1,*")
2463   (set_attr "mode" "V4SF")])
2464
2465(define_expand "reduc_plus_scal_v8df"
2466  [(match_operand:DF 0 "register_operand")
2467   (match_operand:V8DF 1 "register_operand")]
2468  "TARGET_AVX512F"
2469{
2470  rtx tmp = gen_reg_rtx (V8DFmode);
2471  ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2472  emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
2473  DONE;
2474})
2475
2476(define_expand "reduc_plus_scal_v4df"
2477  [(match_operand:DF 0 "register_operand")
2478   (match_operand:V4DF 1 "register_operand")]
2479  "TARGET_AVX"
2480{
2481  rtx tmp = gen_reg_rtx (V4DFmode);
2482  rtx tmp2 = gen_reg_rtx (V4DFmode);
2483  rtx vec_res = gen_reg_rtx (V4DFmode);
2484  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2485  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2486  emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2487  emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
2488  DONE;
2489})
2490
2491(define_expand "reduc_plus_scal_v2df"
2492  [(match_operand:DF 0 "register_operand")
2493   (match_operand:V2DF 1 "register_operand")]
2494  "TARGET_SSE3"
2495{
2496  rtx tmp = gen_reg_rtx (V2DFmode);
2497  emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2498  emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
2499  DONE;
2500})
2501
2502(define_expand "reduc_plus_scal_v16sf"
2503  [(match_operand:SF 0 "register_operand")
2504   (match_operand:V16SF 1 "register_operand")]
2505  "TARGET_AVX512F"
2506{
2507  rtx tmp = gen_reg_rtx (V16SFmode);
2508  ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2509  emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
2510  DONE;
2511})
2512
2513(define_expand "reduc_plus_scal_v8sf"
2514  [(match_operand:SF 0 "register_operand")
2515   (match_operand:V8SF 1 "register_operand")]
2516  "TARGET_AVX"
2517{
2518  rtx tmp = gen_reg_rtx (V8SFmode);
2519  rtx tmp2 = gen_reg_rtx (V8SFmode);
2520  rtx vec_res = gen_reg_rtx (V8SFmode);
2521  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2522  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2523  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2524  emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2525  emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
2526  DONE;
2527})
2528
2529(define_expand "reduc_plus_scal_v4sf"
2530  [(match_operand:SF 0 "register_operand")
2531   (match_operand:V4SF 1 "register_operand")]
2532  "TARGET_SSE"
2533{
2534  rtx vec_res = gen_reg_rtx (V4SFmode);
2535  if (TARGET_SSE3)
2536    {
2537      rtx tmp = gen_reg_rtx (V4SFmode);
2538      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2539      emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2540    }
2541  else
2542    ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2543  emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
2544  DONE;
2545})
2546
2547;; Modes handled by reduc_sm{in,ax}* patterns.
2548(define_mode_iterator REDUC_SMINMAX_MODE
2549  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2550   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2551   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2552   (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2553   (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2554   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2555   (V8DF "TARGET_AVX512F")])
2556
2557(define_expand "reduc_<code>_scal_<mode>"
2558  [(smaxmin:REDUC_SMINMAX_MODE
2559     (match_operand:<ssescalarmode> 0 "register_operand")
2560     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2561  ""
2562{
2563  rtx tmp = gen_reg_rtx (<MODE>mode);
2564  ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2565  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2566							const0_rtx));
2567  DONE;
2568})
2569
2570(define_expand "reduc_<code>_scal_<mode>"
2571  [(umaxmin:VI_AVX512BW
2572     (match_operand:<ssescalarmode> 0 "register_operand")
2573     (match_operand:VI_AVX512BW 1 "register_operand"))]
2574  "TARGET_AVX512F"
2575{
2576  rtx tmp = gen_reg_rtx (<MODE>mode);
2577  ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2578  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2579  							const0_rtx));
2580  DONE;
2581})
2582
2583(define_expand "reduc_<code>_scal_<mode>"
2584  [(umaxmin:VI_256
2585     (match_operand:<ssescalarmode> 0 "register_operand")
2586     (match_operand:VI_256 1 "register_operand"))]
2587  "TARGET_AVX2"
2588{
2589  rtx tmp = gen_reg_rtx (<MODE>mode);
2590  ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2591  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2592							const0_rtx));
2593  DONE;
2594})
2595
2596(define_expand "reduc_umin_scal_v8hi"
2597  [(umin:V8HI
2598     (match_operand:HI 0 "register_operand")
2599     (match_operand:V8HI 1 "register_operand"))]
2600  "TARGET_SSE4_1"
2601{
2602  rtx tmp = gen_reg_rtx (V8HImode);
2603  ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2604  emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2605  DONE;
2606})
2607
2608(define_insn "<mask_codefor>reducep<mode><mask_name>"
2609  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2610	(unspec:VF_AVX512VL
2611	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2612	   (match_operand:SI 2 "const_0_to_255_operand")]
2613	  UNSPEC_REDUCE))]
2614  "TARGET_AVX512DQ"
2615  "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2616  [(set_attr "type" "sse")
2617   (set_attr "prefix" "evex")
2618   (set_attr "mode" "<MODE>")])
2619
2620(define_insn "reduces<mode><mask_scalar_name>"
2621  [(set (match_operand:VF_128 0 "register_operand" "=v")
2622	(vec_merge:VF_128
2623	  (unspec:VF_128
2624	    [(match_operand:VF_128 1 "register_operand" "v")
2625	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2626	     (match_operand:SI 3 "const_0_to_255_operand")]
2627	    UNSPEC_REDUCE)
2628	  (match_dup 1)
2629	  (const_int 1)))]
2630  "TARGET_AVX512DQ"
2631  "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2632  [(set_attr "type" "sse")
2633   (set_attr "prefix" "evex")
2634   (set_attr "mode" "<MODE>")])
2635
2636;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2637;;
2638;; Parallel floating point comparisons
2639;;
2640;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2641
2642(define_insn "avx_cmp<mode>3"
2643  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2644	(unspec:VF_128_256
2645	  [(match_operand:VF_128_256 1 "register_operand" "x")
2646	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2647	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
2648	  UNSPEC_PCMP))]
2649  "TARGET_AVX"
2650  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2651  [(set_attr "type" "ssecmp")
2652   (set_attr "length_immediate" "1")
2653   (set_attr "prefix" "vex")
2654   (set_attr "mode" "<MODE>")])
2655
2656(define_insn "avx_vmcmp<mode>3"
2657  [(set (match_operand:VF_128 0 "register_operand" "=x")
2658	(vec_merge:VF_128
2659	  (unspec:VF_128
2660	    [(match_operand:VF_128 1 "register_operand" "x")
2661	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2662	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2663	    UNSPEC_PCMP)
2664	 (match_dup 1)
2665	 (const_int 1)))]
2666  "TARGET_AVX"
2667  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2668  [(set_attr "type" "ssecmp")
2669   (set_attr "length_immediate" "1")
2670   (set_attr "prefix" "vex")
2671   (set_attr "mode" "<ssescalarmode>")])
2672
2673(define_insn "*<sse>_maskcmp<mode>3_comm"
2674  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2675	(match_operator:VF_128_256 3 "sse_comparison_operator"
2676	  [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2677	   (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2678  "TARGET_SSE
2679   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2680  "@
2681   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2682   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2683  [(set_attr "isa" "noavx,avx")
2684   (set_attr "type" "ssecmp")
2685   (set_attr "length_immediate" "1")
2686   (set_attr "prefix" "orig,vex")
2687   (set_attr "mode" "<MODE>")])
2688
2689(define_insn "<sse>_maskcmp<mode>3"
2690  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2691	(match_operator:VF_128_256 3 "sse_comparison_operator"
2692	  [(match_operand:VF_128_256 1 "register_operand" "0,x")
2693	   (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2694  "TARGET_SSE"
2695  "@
2696   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2697   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2698  [(set_attr "isa" "noavx,avx")
2699   (set_attr "type" "ssecmp")
2700   (set_attr "length_immediate" "1")
2701   (set_attr "prefix" "orig,vex")
2702   (set_attr "mode" "<MODE>")])
2703
2704(define_insn "<sse>_vmmaskcmp<mode>3"
2705  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2706	(vec_merge:VF_128
2707	 (match_operator:VF_128 3 "sse_comparison_operator"
2708	   [(match_operand:VF_128 1 "register_operand" "0,x")
2709	    (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2710	 (match_dup 1)
2711	 (const_int 1)))]
2712  "TARGET_SSE"
2713  "@
2714   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2715   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2716  [(set_attr "isa" "noavx,avx")
2717   (set_attr "type" "ssecmp")
2718   (set_attr "length_immediate" "1,*")
2719   (set_attr "prefix" "orig,vex")
2720   (set_attr "mode" "<ssescalarmode>")])
2721
2722(define_mode_attr cmp_imm_predicate
2723  [(V16SF "const_0_to_31_operand")  (V8DF "const_0_to_31_operand")
2724   (V16SI "const_0_to_7_operand")   (V8DI "const_0_to_7_operand")
2725   (V8SF "const_0_to_31_operand")   (V4DF "const_0_to_31_operand")
2726   (V8SI "const_0_to_7_operand")    (V4DI "const_0_to_7_operand")
2727   (V4SF "const_0_to_31_operand")   (V2DF "const_0_to_31_operand")
2728   (V4SI "const_0_to_7_operand")    (V2DI "const_0_to_7_operand")
2729   (V32HI "const_0_to_7_operand")   (V64QI "const_0_to_7_operand")
2730   (V16HI "const_0_to_7_operand")   (V32QI "const_0_to_7_operand")
2731   (V8HI "const_0_to_7_operand")    (V16QI "const_0_to_7_operand")])
2732
2733(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2734  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2735	(unspec:<avx512fmaskmode>
2736	  [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2737	   (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2738	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2739	  UNSPEC_PCMP))]
2740  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2741  "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2742  [(set_attr "type" "ssecmp")
2743   (set_attr "length_immediate" "1")
2744   (set_attr "prefix" "evex")
2745   (set_attr "mode" "<sseinsnmode>")])
2746
2747(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2748  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2749	(unspec:<avx512fmaskmode>
2750	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2751	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2752	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2753	  UNSPEC_PCMP))]
2754  "TARGET_AVX512BW"
2755  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2756  [(set_attr "type" "ssecmp")
2757   (set_attr "length_immediate" "1")
2758   (set_attr "prefix" "evex")
2759   (set_attr "mode" "<sseinsnmode>")])
2760
2761(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2762  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2763	(unspec:<avx512fmaskmode>
2764	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2765	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2766	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2767	  UNSPEC_UNSIGNED_PCMP))]
2768  "TARGET_AVX512BW"
2769  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2770  [(set_attr "type" "ssecmp")
2771   (set_attr "length_immediate" "1")
2772   (set_attr "prefix" "evex")
2773   (set_attr "mode" "<sseinsnmode>")])
2774
2775(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2776  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2777	(unspec:<avx512fmaskmode>
2778	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2779	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2780	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2781	  UNSPEC_UNSIGNED_PCMP))]
2782  "TARGET_AVX512F"
2783  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2784  [(set_attr "type" "ssecmp")
2785   (set_attr "length_immediate" "1")
2786   (set_attr "prefix" "evex")
2787   (set_attr "mode" "<sseinsnmode>")])
2788
2789(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2790  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2791	(and:<avx512fmaskmode>
2792	  (unspec:<avx512fmaskmode>
2793	    [(match_operand:VF_128 1 "register_operand" "v")
2794	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2795	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2796	    UNSPEC_PCMP)
2797	  (const_int 1)))]
2798  "TARGET_AVX512F"
2799  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
2800  [(set_attr "type" "ssecmp")
2801   (set_attr "length_immediate" "1")
2802   (set_attr "prefix" "evex")
2803   (set_attr "mode" "<ssescalarmode>")])
2804
2805(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2806  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2807	(and:<avx512fmaskmode>
2808	  (unspec:<avx512fmaskmode>
2809	    [(match_operand:VF_128 1 "register_operand" "v")
2810	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2811	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2812	    UNSPEC_PCMP)
2813	  (and:<avx512fmaskmode>
2814	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2815	    (const_int 1))))]
2816  "TARGET_AVX512F"
2817  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
2818  [(set_attr "type" "ssecmp")
2819   (set_attr "length_immediate" "1")
2820   (set_attr "prefix" "evex")
2821   (set_attr "mode" "<ssescalarmode>")])
2822
2823(define_insn "avx512f_maskcmp<mode>3"
2824  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2825	(match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2826	  [(match_operand:VF 1 "register_operand" "v")
2827	   (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2828  "TARGET_AVX512F"
2829  "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2830  [(set_attr "type" "ssecmp")
2831   (set_attr "length_immediate" "1")
2832   (set_attr "prefix" "evex")
2833   (set_attr "mode" "<sseinsnmode>")])
2834
2835(define_insn "<sse>_<unord>comi<round_saeonly_name>"
2836  [(set (reg:CCFP FLAGS_REG)
2837	(compare:CCFP
2838	  (vec_select:MODEF
2839	    (match_operand:<ssevecmode> 0 "register_operand" "v")
2840	    (parallel [(const_int 0)]))
2841	  (vec_select:MODEF
2842	    (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2843	    (parallel [(const_int 0)]))))]
2844  "SSE_FLOAT_MODE_P (<MODE>mode)"
2845  "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2846  [(set_attr "type" "ssecomi")
2847   (set_attr "prefix" "maybe_vex")
2848   (set_attr "prefix_rep" "0")
2849   (set (attr "prefix_data16")
2850	(if_then_else (eq_attr "mode" "DF")
2851		      (const_string "1")
2852		      (const_string "0")))
2853   (set_attr "mode" "<MODE>")])
2854
2855(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2856  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2857	(match_operator:<avx512fmaskmode> 1 ""
2858	  [(match_operand:V48_AVX512VL 2 "register_operand")
2859	   (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2860  "TARGET_AVX512F"
2861{
2862  bool ok = ix86_expand_mask_vec_cmp (operands);
2863  gcc_assert (ok);
2864  DONE;
2865})
2866
2867(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2868  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2869	(match_operator:<avx512fmaskmode> 1 ""
2870	  [(match_operand:VI12_AVX512VL 2 "register_operand")
2871	   (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2872  "TARGET_AVX512BW"
2873{
2874  bool ok = ix86_expand_mask_vec_cmp (operands);
2875  gcc_assert (ok);
2876  DONE;
2877})
2878
2879(define_expand "vec_cmp<mode><sseintvecmodelower>"
2880  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2881	(match_operator:<sseintvecmode> 1 ""
2882	  [(match_operand:VI_256 2 "register_operand")
2883	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
2884  "TARGET_AVX2"
2885{
2886  bool ok = ix86_expand_int_vec_cmp (operands);
2887  gcc_assert (ok);
2888  DONE;
2889})
2890
2891(define_expand "vec_cmp<mode><sseintvecmodelower>"
2892  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2893	(match_operator:<sseintvecmode> 1 ""
2894	  [(match_operand:VI124_128 2 "register_operand")
2895	   (match_operand:VI124_128 3 "vector_operand")]))]
2896  "TARGET_SSE2"
2897{
2898  bool ok = ix86_expand_int_vec_cmp (operands);
2899  gcc_assert (ok);
2900  DONE;
2901})
2902
2903(define_expand "vec_cmpv2div2di"
2904  [(set (match_operand:V2DI 0 "register_operand")
2905	(match_operator:V2DI 1 ""
2906	  [(match_operand:V2DI 2 "register_operand")
2907	   (match_operand:V2DI 3 "vector_operand")]))]
2908  "TARGET_SSE4_2"
2909{
2910  bool ok = ix86_expand_int_vec_cmp (operands);
2911  gcc_assert (ok);
2912  DONE;
2913})
2914
2915(define_expand "vec_cmp<mode><sseintvecmodelower>"
2916  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2917	(match_operator:<sseintvecmode> 1 ""
2918	  [(match_operand:VF_256 2 "register_operand")
2919	   (match_operand:VF_256 3 "nonimmediate_operand")]))]
2920  "TARGET_AVX"
2921{
2922  bool ok = ix86_expand_fp_vec_cmp (operands);
2923  gcc_assert (ok);
2924  DONE;
2925})
2926
2927(define_expand "vec_cmp<mode><sseintvecmodelower>"
2928  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2929	(match_operator:<sseintvecmode> 1 ""
2930	  [(match_operand:VF_128 2 "register_operand")
2931	   (match_operand:VF_128 3 "vector_operand")]))]
2932  "TARGET_SSE"
2933{
2934  bool ok = ix86_expand_fp_vec_cmp (operands);
2935  gcc_assert (ok);
2936  DONE;
2937})
2938
2939(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2940  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2941	(match_operator:<avx512fmaskmode> 1 ""
2942	  [(match_operand:VI48_AVX512VL 2 "register_operand")
2943	   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2944  "TARGET_AVX512F"
2945{
2946  bool ok = ix86_expand_mask_vec_cmp (operands);
2947  gcc_assert (ok);
2948  DONE;
2949})
2950
2951(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2952  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2953	(match_operator:<avx512fmaskmode> 1 ""
2954	  [(match_operand:VI12_AVX512VL 2 "register_operand")
2955	   (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2956  "TARGET_AVX512BW"
2957{
2958  bool ok = ix86_expand_mask_vec_cmp (operands);
2959  gcc_assert (ok);
2960  DONE;
2961})
2962
2963(define_expand "vec_cmpu<mode><sseintvecmodelower>"
2964  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2965	(match_operator:<sseintvecmode> 1 ""
2966	  [(match_operand:VI_256 2 "register_operand")
2967	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
2968  "TARGET_AVX2"
2969{
2970  bool ok = ix86_expand_int_vec_cmp (operands);
2971  gcc_assert (ok);
2972  DONE;
2973})
2974
2975(define_expand "vec_cmpu<mode><sseintvecmodelower>"
2976  [(set (match_operand:<sseintvecmode> 0 "register_operand")
2977	(match_operator:<sseintvecmode> 1 ""
2978	  [(match_operand:VI124_128 2 "register_operand")
2979	   (match_operand:VI124_128 3 "vector_operand")]))]
2980  "TARGET_SSE2"
2981{
2982  bool ok = ix86_expand_int_vec_cmp (operands);
2983  gcc_assert (ok);
2984  DONE;
2985})
2986
2987(define_expand "vec_cmpuv2div2di"
2988  [(set (match_operand:V2DI 0 "register_operand")
2989	(match_operator:V2DI 1 ""
2990	  [(match_operand:V2DI 2 "register_operand")
2991	   (match_operand:V2DI 3 "vector_operand")]))]
2992  "TARGET_SSE4_2"
2993{
2994  bool ok = ix86_expand_int_vec_cmp (operands);
2995  gcc_assert (ok);
2996  DONE;
2997})
2998
2999(define_expand "vec_cmpeqv2div2di"
3000  [(set (match_operand:V2DI 0 "register_operand")
3001	(match_operator:V2DI 1 ""
3002	  [(match_operand:V2DI 2 "register_operand")
3003	   (match_operand:V2DI 3 "vector_operand")]))]
3004  "TARGET_SSE4_1"
3005{
3006  bool ok = ix86_expand_int_vec_cmp (operands);
3007  gcc_assert (ok);
3008  DONE;
3009})
3010
3011(define_expand "vcond<V_512:mode><VF_512:mode>"
3012  [(set (match_operand:V_512 0 "register_operand")
3013	(if_then_else:V_512
3014	  (match_operator 3 ""
3015	    [(match_operand:VF_512 4 "nonimmediate_operand")
3016	     (match_operand:VF_512 5 "nonimmediate_operand")])
3017	  (match_operand:V_512 1 "general_operand")
3018	  (match_operand:V_512 2 "general_operand")))]
3019  "TARGET_AVX512F
3020   && (GET_MODE_NUNITS (<V_512:MODE>mode)
3021       == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3022{
3023  bool ok = ix86_expand_fp_vcond (operands);
3024  gcc_assert (ok);
3025  DONE;
3026})
3027
3028(define_expand "vcond<V_256:mode><VF_256:mode>"
3029  [(set (match_operand:V_256 0 "register_operand")
3030	(if_then_else:V_256
3031	  (match_operator 3 ""
3032	    [(match_operand:VF_256 4 "nonimmediate_operand")
3033	     (match_operand:VF_256 5 "nonimmediate_operand")])
3034	  (match_operand:V_256 1 "general_operand")
3035	  (match_operand:V_256 2 "general_operand")))]
3036  "TARGET_AVX
3037   && (GET_MODE_NUNITS (<V_256:MODE>mode)
3038       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3039{
3040  bool ok = ix86_expand_fp_vcond (operands);
3041  gcc_assert (ok);
3042  DONE;
3043})
3044
3045(define_expand "vcond<V_128:mode><VF_128:mode>"
3046  [(set (match_operand:V_128 0 "register_operand")
3047	(if_then_else:V_128
3048	  (match_operator 3 ""
3049	    [(match_operand:VF_128 4 "vector_operand")
3050	     (match_operand:VF_128 5 "vector_operand")])
3051	  (match_operand:V_128 1 "general_operand")
3052	  (match_operand:V_128 2 "general_operand")))]
3053  "TARGET_SSE
3054   && (GET_MODE_NUNITS (<V_128:MODE>mode)
3055       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3056{
3057  bool ok = ix86_expand_fp_vcond (operands);
3058  gcc_assert (ok);
3059  DONE;
3060})
3061
3062(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3063  [(set (match_operand:V48_AVX512VL 0 "register_operand")
3064	(vec_merge:V48_AVX512VL
3065	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3066	  (match_operand:V48_AVX512VL 2 "vector_move_operand")
3067	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3068  "TARGET_AVX512F")
3069
3070(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3071  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3072	(vec_merge:VI12_AVX512VL
3073	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3074	  (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3075	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3076  "TARGET_AVX512BW")
3077
3078(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3079  [(set (match_operand:VI_256 0 "register_operand")
3080	(vec_merge:VI_256
3081	  (match_operand:VI_256 1 "nonimmediate_operand")
3082	  (match_operand:VI_256 2 "vector_move_operand")
3083	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3084  "TARGET_AVX2"
3085{
3086  ix86_expand_sse_movcc (operands[0], operands[3],
3087			 operands[1], operands[2]);
3088  DONE;
3089})
3090
3091(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3092  [(set (match_operand:VI124_128 0 "register_operand")
3093	(vec_merge:VI124_128
3094	  (match_operand:VI124_128 1 "vector_operand")
3095	  (match_operand:VI124_128 2 "vector_move_operand")
3096	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3097  "TARGET_SSE2"
3098{
3099  ix86_expand_sse_movcc (operands[0], operands[3],
3100			 operands[1], operands[2]);
3101  DONE;
3102})
3103
3104(define_expand "vcond_mask_v2div2di"
3105  [(set (match_operand:V2DI 0 "register_operand")
3106	(vec_merge:V2DI
3107	  (match_operand:V2DI 1 "vector_operand")
3108	  (match_operand:V2DI 2 "vector_move_operand")
3109	  (match_operand:V2DI 3 "register_operand")))]
3110  "TARGET_SSE4_2"
3111{
3112  ix86_expand_sse_movcc (operands[0], operands[3],
3113			 operands[1], operands[2]);
3114  DONE;
3115})
3116
3117(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3118  [(set (match_operand:VF_256 0 "register_operand")
3119	(vec_merge:VF_256
3120	  (match_operand:VF_256 1 "nonimmediate_operand")
3121	  (match_operand:VF_256 2 "vector_move_operand")
3122	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3123  "TARGET_AVX"
3124{
3125  ix86_expand_sse_movcc (operands[0], operands[3],
3126			 operands[1], operands[2]);
3127  DONE;
3128})
3129
3130(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3131  [(set (match_operand:VF_128 0 "register_operand")
3132	(vec_merge:VF_128
3133	  (match_operand:VF_128 1 "vector_operand")
3134	  (match_operand:VF_128 2 "vector_move_operand")
3135	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3136  "TARGET_SSE"
3137{
3138  ix86_expand_sse_movcc (operands[0], operands[3],
3139			 operands[1], operands[2]);
3140  DONE;
3141})
3142
3143;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3144;;
3145;; Parallel floating point logical operations
3146;;
3147;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3148
3149(define_insn "<sse>_andnot<mode>3<mask_name>"
3150  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3151	(and:VF_128_256
3152	  (not:VF_128_256
3153	    (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3154	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3155  "TARGET_SSE && <mask_avx512vl_condition>"
3156{
3157  static char buf[128];
3158  const char *ops;
3159  const char *suffix;
3160
3161  switch (which_alternative)
3162    {
3163    case 0:
3164      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3165      break;
3166    case 1:
3167    case 2:
3168    case 3:
3169      ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3170      break;
3171    default:
3172      gcc_unreachable ();
3173    }
3174
3175  switch (get_attr_mode (insn))
3176    {
3177    case MODE_V8SF:
3178    case MODE_V4SF:
3179      suffix = "ps";
3180      break;
3181    case MODE_OI:
3182    case MODE_TI:
3183      /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
3184      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3185      ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3186      break;
3187    default:
3188      suffix = "<ssemodesuffix>";
3189    }
3190
3191  snprintf (buf, sizeof (buf), ops, suffix);
3192  return buf;
3193}
3194  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3195   (set_attr "type" "sselog")
3196   (set_attr "prefix" "orig,maybe_vex,evex,evex")
3197   (set (attr "mode")
3198	(cond [(and (match_test "<mask_applied>")
3199		    (and (eq_attr "alternative" "1")
3200			 (match_test "!TARGET_AVX512DQ")))
3201		 (const_string "<sseintvecmode2>")
3202	       (eq_attr "alternative" "3")
3203		 (const_string "<sseintvecmode2>")
3204	       (and (match_test "<MODE_SIZE> == 16")
3205		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3206		 (const_string "<ssePSmode>")
3207	       (match_test "TARGET_AVX")
3208		 (const_string "<MODE>")
3209	       (match_test "optimize_function_for_size_p (cfun)")
3210		 (const_string "V4SF")
3211	       ]
3212	       (const_string "<MODE>")))])
3213
3214
3215(define_insn "<sse>_andnot<mode>3<mask_name>"
3216  [(set (match_operand:VF_512 0 "register_operand" "=v")
3217	(and:VF_512
3218	  (not:VF_512
3219	    (match_operand:VF_512 1 "register_operand" "v"))
3220	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3221  "TARGET_AVX512F"
3222{
3223  static char buf[128];
3224  const char *ops;
3225  const char *suffix;
3226
3227  suffix = "<ssemodesuffix>";
3228  ops = "";
3229
3230  /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
3231  if (!TARGET_AVX512DQ)
3232    {
3233      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3234      ops = "p";
3235    }
3236
3237  snprintf (buf, sizeof (buf),
3238	    "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3239	    ops, suffix);
3240  return buf;
3241}
3242  [(set_attr "type" "sselog")
3243   (set_attr "prefix" "evex")
3244   (set (attr "mode")
3245        (if_then_else (match_test "TARGET_AVX512DQ")
3246		      (const_string "<sseinsnmode>")
3247		      (const_string "XI")))])
3248
3249(define_expand "<code><mode>3<mask_name>"
3250  [(set (match_operand:VF_128_256 0 "register_operand")
3251       (any_logic:VF_128_256
3252         (match_operand:VF_128_256 1 "vector_operand")
3253         (match_operand:VF_128_256 2 "vector_operand")))]
3254  "TARGET_SSE && <mask_avx512vl_condition>"
3255  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3256
3257(define_expand "<code><mode>3<mask_name>"
3258  [(set (match_operand:VF_512 0 "register_operand")
3259       (any_logic:VF_512
3260         (match_operand:VF_512 1 "nonimmediate_operand")
3261         (match_operand:VF_512 2 "nonimmediate_operand")))]
3262  "TARGET_AVX512F"
3263  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3264
3265(define_insn "*<code><mode>3<mask_name>"
3266  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3267	(any_logic:VF_128_256
3268	  (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3269	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3270  "TARGET_SSE && <mask_avx512vl_condition>
3271   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3272{
3273  static char buf[128];
3274  const char *ops;
3275  const char *suffix;
3276
3277  switch (which_alternative)
3278    {
3279    case 0:
3280      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3281      break;
3282    case 1:
3283    case 2:
3284    case 3:
3285      ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3286      break;
3287    default:
3288      gcc_unreachable ();
3289    }
3290
3291  switch (get_attr_mode (insn))
3292    {
3293    case MODE_V8SF:
3294    case MODE_V4SF:
3295      suffix = "ps";
3296      break;
3297    case MODE_OI:
3298    case MODE_TI:
3299      /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[qd].  */
3300      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3301      ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3302      break;
3303    default:
3304      suffix = "<ssemodesuffix>";
3305    }
3306
3307  snprintf (buf, sizeof (buf), ops, suffix);
3308  return buf;
3309}
3310  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3311   (set_attr "type" "sselog")
3312   (set_attr "prefix" "orig,maybe_evex,evex,evex")
3313   (set (attr "mode")
3314	(cond [(and (match_test "<mask_applied>")
3315		    (and (eq_attr "alternative" "1")
3316			 (match_test "!TARGET_AVX512DQ")))
3317		 (const_string "<sseintvecmode2>")
3318	       (eq_attr "alternative" "3")
3319		 (const_string "<sseintvecmode2>")
3320	       (and (match_test "<MODE_SIZE> == 16")
3321		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3322		 (const_string "<ssePSmode>")
3323	       (match_test "TARGET_AVX")
3324		 (const_string "<MODE>")
3325	       (match_test "optimize_function_for_size_p (cfun)")
3326		 (const_string "V4SF")
3327	       ]
3328	       (const_string "<MODE>")))])
3329
3330(define_insn "*<code><mode>3<mask_name>"
3331  [(set (match_operand:VF_512 0 "register_operand" "=v")
3332	(any_logic:VF_512
3333	  (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3334	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3335  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3336{
3337  static char buf[128];
3338  const char *ops;
3339  const char *suffix;
3340
3341  suffix = "<ssemodesuffix>";
3342  ops = "";
3343
3344  /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[dq].  */
3345  if (!TARGET_AVX512DQ)
3346    {
3347      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3348      ops = "p";
3349    }
3350
3351  snprintf (buf, sizeof (buf),
3352	   "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3353	   ops, suffix);
3354  return buf;
3355}
3356  [(set_attr "type" "sselog")
3357   (set_attr "prefix" "evex")
3358   (set (attr "mode")
3359        (if_then_else (match_test "TARGET_AVX512DQ")
3360		      (const_string "<sseinsnmode>")
3361		      (const_string "XI")))])
3362
3363(define_expand "copysign<mode>3"
3364  [(set (match_dup 4)
3365	(and:VF
3366	  (not:VF (match_dup 3))
3367	  (match_operand:VF 1 "vector_operand")))
3368   (set (match_dup 5)
3369	(and:VF (match_dup 3)
3370		(match_operand:VF 2 "vector_operand")))
3371   (set (match_operand:VF 0 "register_operand")
3372	(ior:VF (match_dup 4) (match_dup 5)))]
3373  "TARGET_SSE"
3374{
3375  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3376
3377  operands[4] = gen_reg_rtx (<MODE>mode);
3378  operands[5] = gen_reg_rtx (<MODE>mode);
3379})
3380
3381;; Also define scalar versions.  These are used for abs, neg, and
3382;; conditional move.  Using subregs into vector modes causes register
3383;; allocation lossage.  These patterns do not allow memory operands
3384;; because the native instructions read the full 128-bits.
3385
3386(define_insn "*andnot<mode>3"
3387  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3388	(and:MODEF
3389	  (not:MODEF
3390	    (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3391	    (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3392  "SSE_FLOAT_MODE_P (<MODE>mode)"
3393{
3394  static char buf[128];
3395  const char *ops;
3396  const char *suffix
3397    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3398
3399  switch (which_alternative)
3400    {
3401    case 0:
3402      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3403      break;
3404    case 1:
3405      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3406      break;
3407    case 2:
3408      if (TARGET_AVX512DQ)
3409	ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3410      else
3411	{
3412	  suffix = <MODE>mode == DFmode ? "q" : "d";
3413	  ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3414	}
3415      break;
3416    case 3:
3417      if (TARGET_AVX512DQ)
3418	ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3419      else
3420	{
3421	  suffix = <MODE>mode == DFmode ? "q" : "d";
3422	  ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3423	}
3424      break;
3425    default:
3426      gcc_unreachable ();
3427    }
3428
3429  snprintf (buf, sizeof (buf), ops, suffix);
3430  return buf;
3431}
3432  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3433   (set_attr "type" "sselog")
3434   (set_attr "prefix" "orig,vex,evex,evex")
3435   (set (attr "mode")
3436	(cond [(eq_attr "alternative" "2")
3437		 (if_then_else (match_test "TARGET_AVX512DQ")
3438			       (const_string "<ssevecmode>")
3439			       (const_string "TI"))
3440	       (eq_attr "alternative" "3")
3441		 (if_then_else (match_test "TARGET_AVX512DQ")
3442			       (const_string "<avx512fvecmode>")
3443			       (const_string "XI"))
3444	       (and (match_test "<MODE_SIZE> == 16")
3445		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3446		 (const_string "V4SF")
3447	       (match_test "TARGET_AVX")
3448		 (const_string "<ssevecmode>")
3449	       (match_test "optimize_function_for_size_p (cfun)")
3450		 (const_string "V4SF")
3451	       ]
3452	       (const_string "<ssevecmode>")))])
3453
3454(define_insn "*andnottf3"
3455  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3456	(and:TF
3457	  (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3458	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3459  "TARGET_SSE"
3460{
3461  static char buf[128];
3462  const char *ops;
3463  const char *tmp
3464    = (which_alternative >= 2 ? "pandnq"
3465       : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3466
3467  switch (which_alternative)
3468    {
3469    case 0:
3470      ops = "%s\t{%%2, %%0|%%0, %%2}";
3471      break;
3472    case 1:
3473    case 2:
3474      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3475      break;
3476    case 3:
3477      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3478      break;
3479    default:
3480      gcc_unreachable ();
3481    }
3482
3483  snprintf (buf, sizeof (buf), ops, tmp);
3484  return buf;
3485}
3486  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3487   (set_attr "type" "sselog")
3488   (set (attr "prefix_data16")
3489     (if_then_else
3490       (and (eq_attr "alternative" "0")
3491	    (eq_attr "mode" "TI"))
3492       (const_string "1")
3493       (const_string "*")))
3494   (set_attr "prefix" "orig,vex,evex,evex")
3495   (set (attr "mode")
3496	(cond [(eq_attr "alternative" "2")
3497		 (const_string "TI")
3498	       (eq_attr "alternative" "3")
3499		 (const_string "XI")
3500	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3501		 (const_string "V4SF")
3502	       (match_test "TARGET_AVX")
3503		 (const_string "TI")
3504	       (ior (not (match_test "TARGET_SSE2"))
3505		    (match_test "optimize_function_for_size_p (cfun)"))
3506		 (const_string "V4SF")
3507	       ]
3508	       (const_string "TI")))])
3509
3510(define_insn "*<code><mode>3"
3511  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3512	(any_logic:MODEF
3513	  (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3514	  (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3515  "SSE_FLOAT_MODE_P (<MODE>mode)"
3516{
3517  static char buf[128];
3518  const char *ops;
3519  const char *suffix
3520    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3521
3522  switch (which_alternative)
3523    {
3524    case 0:
3525      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3526      break;
3527    case 2:
3528      if (!TARGET_AVX512DQ)
3529	{
3530	  suffix = <MODE>mode == DFmode ? "q" : "d";
3531	  ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3532	  break;
3533	}
3534      /* FALLTHRU */
3535    case 1:
3536      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3537      break;
3538    case 3:
3539      if (TARGET_AVX512DQ)
3540	ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3541      else
3542	{
3543	  suffix = <MODE>mode == DFmode ? "q" : "d";
3544	  ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3545	}
3546      break;
3547    default:
3548      gcc_unreachable ();
3549    }
3550
3551  snprintf (buf, sizeof (buf), ops, suffix);
3552  return buf;
3553}
3554  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3555   (set_attr "type" "sselog")
3556   (set_attr "prefix" "orig,vex,evex,evex")
3557   (set (attr "mode")
3558	(cond [(eq_attr "alternative" "2")
3559		 (if_then_else (match_test "TARGET_AVX512DQ")
3560			       (const_string "<ssevecmode>")
3561			       (const_string "TI"))
3562	       (eq_attr "alternative" "3")
3563		 (if_then_else (match_test "TARGET_AVX512DQ")
3564			       (const_string "<avx512fvecmode>")
3565			       (const_string "XI"))
3566	       (and (match_test "<MODE_SIZE> == 16")
3567		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3568		 (const_string "V4SF")
3569	       (match_test "TARGET_AVX")
3570		 (const_string "<ssevecmode>")
3571	       (match_test "optimize_function_for_size_p (cfun)")
3572		 (const_string "V4SF")
3573	       ]
3574	       (const_string "<ssevecmode>")))])
3575
3576(define_expand "<code>tf3"
3577  [(set (match_operand:TF 0 "register_operand")
3578	(any_logic:TF
3579	  (match_operand:TF 1 "vector_operand")
3580	  (match_operand:TF 2 "vector_operand")))]
3581  "TARGET_SSE"
3582  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3583
3584(define_insn "*<code>tf3"
3585  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3586	(any_logic:TF
3587	  (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3588	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3589  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3590{
3591  static char buf[128];
3592  const char *ops;
3593  const char *tmp
3594    = (which_alternative >= 2 ? "p<logic>q"
3595       : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3596
3597  switch (which_alternative)
3598    {
3599    case 0:
3600      ops = "%s\t{%%2, %%0|%%0, %%2}";
3601      break;
3602    case 1:
3603    case 2:
3604      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3605      break;
3606    case 3:
3607      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3608      break;
3609    default:
3610      gcc_unreachable ();
3611    }
3612
3613  snprintf (buf, sizeof (buf), ops, tmp);
3614  return buf;
3615}
3616  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3617   (set_attr "type" "sselog")
3618   (set (attr "prefix_data16")
3619     (if_then_else
3620       (and (eq_attr "alternative" "0")
3621	    (eq_attr "mode" "TI"))
3622       (const_string "1")
3623       (const_string "*")))
3624   (set_attr "prefix" "orig,vex,evex,evex")
3625   (set (attr "mode")
3626	(cond [(eq_attr "alternative" "2")
3627		 (const_string "TI")
3628	       (eq_attr "alternative" "3")
3629		 (const_string "QI")
3630	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3631		 (const_string "V4SF")
3632	       (match_test "TARGET_AVX")
3633		 (const_string "TI")
3634	       (ior (not (match_test "TARGET_SSE2"))
3635		    (match_test "optimize_function_for_size_p (cfun)"))
3636		 (const_string "V4SF")
3637	       ]
3638	       (const_string "TI")))])
3639
3640;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3641;;
3642;; FMA floating point multiply/accumulate instructions.  These include
3643;; scalar versions of the instructions as well as vector versions.
3644;;
3645;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3646
3647;; The standard names for scalar FMA are only available with SSE math enabled.
3648;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma.  It doesn't
3649;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3650;; and TARGET_FMA4 are both false.
3651;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3652;; one must force the EVEX encoding of the fma insns.  Ideally we'd improve
3653;; GAS to allow proper prefix selection.  However, for the moment all hardware
3654;; that supports AVX512F also supports FMA so we can ignore this for now.
3655(define_mode_iterator FMAMODEM
3656  [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3657   (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3658   (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3659   (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3660   (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3661   (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3662   (V16SF "TARGET_AVX512F")
3663   (V8DF "TARGET_AVX512F")])
3664
3665(define_expand "fma<mode>4"
3666  [(set (match_operand:FMAMODEM 0 "register_operand")
3667	(fma:FMAMODEM
3668	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3669	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3670	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3671
3672(define_expand "fms<mode>4"
3673  [(set (match_operand:FMAMODEM 0 "register_operand")
3674	(fma:FMAMODEM
3675	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3676	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3677	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3678
3679(define_expand "fnma<mode>4"
3680  [(set (match_operand:FMAMODEM 0 "register_operand")
3681	(fma:FMAMODEM
3682	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3683	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3684	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3685
3686(define_expand "fnms<mode>4"
3687  [(set (match_operand:FMAMODEM 0 "register_operand")
3688	(fma:FMAMODEM
3689	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3690	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3691	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3692
3693;; The builtins for intrinsics are not constrained by SSE math enabled.
3694(define_mode_iterator FMAMODE_AVX512
3695 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3696  (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3697  (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3698  (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3699  (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3700  (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3701  (V16SF "TARGET_AVX512F")
3702  (V8DF "TARGET_AVX512F")])
3703
3704(define_mode_iterator FMAMODE
3705  [SF DF V4SF V2DF V8SF V4DF])
3706
3707(define_expand "fma4i_fmadd_<mode>"
3708  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3709	(fma:FMAMODE_AVX512
3710	  (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3711	  (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3712	  (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3713
3714(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3715  [(match_operand:VF_AVX512VL 0 "register_operand")
3716   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3717   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3718   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3719   (match_operand:<avx512fmaskmode> 4 "register_operand")]
3720  "TARGET_AVX512F && <round_mode512bit_condition>"
3721{
3722  emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3723    operands[0], operands[1], operands[2], operands[3],
3724    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3725  DONE;
3726})
3727
3728(define_insn "*fma_fmadd_<mode>"
3729  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3730	(fma:FMAMODE
3731	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3732	  (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3733	  (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3734  "TARGET_FMA || TARGET_FMA4"
3735  "@
3736   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3737   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3738   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3739   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3740   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3741  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3742   (set_attr "type" "ssemuladd")
3743   (set_attr "mode" "<MODE>")])
3744
3745;; Suppose AVX-512F as baseline
3746(define_mode_iterator VF_SF_AVX512VL
3747  [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3748   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3749
3750(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3751  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3752	(fma:VF_SF_AVX512VL
3753	  (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3754	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3755	  (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3756  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3757  "@
3758   vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3759   vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3760   vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3761  [(set_attr "type" "ssemuladd")
3762   (set_attr "mode" "<MODE>")])
3763
3764(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3765  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3766	(vec_merge:VF_AVX512VL
3767	  (fma:VF_AVX512VL
3768	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3769	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3770	    (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3771	  (match_dup 1)
3772	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3773  "TARGET_AVX512F && <round_mode512bit_condition>"
3774  "@
3775   vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3776   vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3777  [(set_attr "type" "ssemuladd")
3778   (set_attr "mode" "<MODE>")])
3779
3780(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3781  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3782	(vec_merge:VF_AVX512VL
3783	  (fma:VF_AVX512VL
3784	    (match_operand:VF_AVX512VL 1 "register_operand" "v")
3785	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3786	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3787	  (match_dup 3)
3788	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3789  "TARGET_AVX512F"
3790  "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3791  [(set_attr "type" "ssemuladd")
3792   (set_attr "mode" "<MODE>")])
3793
3794(define_insn "*fma_fmsub_<mode>"
3795  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3796	(fma:FMAMODE
3797	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0,0,v,x,x")
3798	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3799	  (neg:FMAMODE
3800	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3801  "TARGET_FMA || TARGET_FMA4"
3802  "@
3803   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3804   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3805   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3806   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3807   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3808  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3809   (set_attr "type" "ssemuladd")
3810   (set_attr "mode" "<MODE>")])
3811
3812(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3813  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3814	(fma:VF_SF_AVX512VL
3815	  (match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
3816	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3817	  (neg:VF_SF_AVX512VL
3818	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3819  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3820  "@
3821   vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3822   vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3823   vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3824  [(set_attr "type" "ssemuladd")
3825   (set_attr "mode" "<MODE>")])
3826
3827(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3828  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3829	(vec_merge:VF_AVX512VL
3830	  (fma:VF_AVX512VL
3831	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3832	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3833	    (neg:VF_AVX512VL
3834	      (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3835	  (match_dup 1)
3836	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3837  "TARGET_AVX512F"
3838  "@
3839   vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3840   vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3841  [(set_attr "type" "ssemuladd")
3842   (set_attr "mode" "<MODE>")])
3843
3844(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3845  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3846	(vec_merge:VF_AVX512VL
3847	  (fma:VF_AVX512VL
3848	    (match_operand:VF_AVX512VL 1 "register_operand" "v")
3849	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3850	    (neg:VF_AVX512VL
3851	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3852	  (match_dup 3)
3853	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3854  "TARGET_AVX512F && <round_mode512bit_condition>"
3855  "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3856  [(set_attr "type" "ssemuladd")
3857   (set_attr "mode" "<MODE>")])
3858
3859(define_insn "*fma_fnmadd_<mode>"
3860  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3861	(fma:FMAMODE
3862	  (neg:FMAMODE
3863	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3864	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3865	  (match_operand:FMAMODE   3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3866  "TARGET_FMA || TARGET_FMA4"
3867  "@
3868   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3869   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3870   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3871   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3872   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3873  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3874   (set_attr "type" "ssemuladd")
3875   (set_attr "mode" "<MODE>")])
3876
3877(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3878  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3879	(fma:VF_SF_AVX512VL
3880	  (neg:VF_SF_AVX512VL
3881	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3882	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3883	  (match_operand:VF_SF_AVX512VL   3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3884  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3885  "@
3886   vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3887   vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3888   vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3889  [(set_attr "type" "ssemuladd")
3890   (set_attr "mode" "<MODE>")])
3891
3892(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3893  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3894	(vec_merge:VF_AVX512VL
3895	  (fma:VF_AVX512VL
3896	    (neg:VF_AVX512VL
3897	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3898	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3899	    (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3900	  (match_dup 1)
3901	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3902  "TARGET_AVX512F && <round_mode512bit_condition>"
3903  "@
3904   vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3905   vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3906  [(set_attr "type" "ssemuladd")
3907   (set_attr "mode" "<MODE>")])
3908
3909(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3910  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3911	(vec_merge:VF_AVX512VL
3912	  (fma:VF_AVX512VL
3913	    (neg:VF_AVX512VL
3914	      (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3915	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3916	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3917	  (match_dup 3)
3918	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3919  "TARGET_AVX512F && <round_mode512bit_condition>"
3920  "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3921  [(set_attr "type" "ssemuladd")
3922   (set_attr "mode" "<MODE>")])
3923
3924(define_insn "*fma_fnmsub_<mode>"
3925  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3926	(fma:FMAMODE
3927	  (neg:FMAMODE
3928	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3929	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3930	  (neg:FMAMODE
3931	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3932  "TARGET_FMA || TARGET_FMA4"
3933  "@
3934   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3935   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3936   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3937   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3938   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3939  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3940   (set_attr "type" "ssemuladd")
3941   (set_attr "mode" "<MODE>")])
3942
3943(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3944  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3945	(fma:VF_SF_AVX512VL
3946	  (neg:VF_SF_AVX512VL
3947	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3948	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3949	  (neg:VF_SF_AVX512VL
3950	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3951  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3952  "@
3953   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3954   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3955   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3956  [(set_attr "type" "ssemuladd")
3957   (set_attr "mode" "<MODE>")])
3958
3959(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3960  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3961	(vec_merge:VF_AVX512VL
3962	  (fma:VF_AVX512VL
3963	    (neg:VF_AVX512VL
3964	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3965	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3966	    (neg:VF_AVX512VL
3967	      (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3968	  (match_dup 1)
3969	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3970  "TARGET_AVX512F && <round_mode512bit_condition>"
3971  "@
3972   vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3973   vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3974  [(set_attr "type" "ssemuladd")
3975   (set_attr "mode" "<MODE>")])
3976
3977(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3978  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3979	(vec_merge:VF_AVX512VL
3980	  (fma:VF_AVX512VL
3981	    (neg:VF_AVX512VL
3982	      (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3983	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3984	    (neg:VF_AVX512VL
3985	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3986	  (match_dup 3)
3987	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3988  "TARGET_AVX512F"
3989  "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3990  [(set_attr "type" "ssemuladd")
3991   (set_attr "mode" "<MODE>")])
3992
3993;; FMA parallel floating point multiply addsub and subadd operations.
3994
3995;; It would be possible to represent these without the UNSPEC as
3996;;
3997;; (vec_merge
3998;;   (fma op1 op2 op3)
3999;;   (fma op1 op2 (neg op3))
4000;;   (merge-const))
4001;;
4002;; But this doesn't seem useful in practice.
4003
4004(define_expand "fmaddsub_<mode>"
4005  [(set (match_operand:VF 0 "register_operand")
4006	(unspec:VF
4007	  [(match_operand:VF 1 "nonimmediate_operand")
4008	   (match_operand:VF 2 "nonimmediate_operand")
4009	   (match_operand:VF 3 "nonimmediate_operand")]
4010	  UNSPEC_FMADDSUB))]
4011  "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4012
4013(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4014  [(match_operand:VF_AVX512VL 0 "register_operand")
4015   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4016   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4017   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4018   (match_operand:<avx512fmaskmode> 4 "register_operand")]
4019  "TARGET_AVX512F"
4020{
4021  emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4022    operands[0], operands[1], operands[2], operands[3],
4023    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4024  DONE;
4025})
4026
4027(define_insn "*fma_fmaddsub_<mode>"
4028  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4029	(unspec:VF_128_256
4030	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4031	   (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4032	   (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4033	  UNSPEC_FMADDSUB))]
4034  "TARGET_FMA || TARGET_FMA4"
4035  "@
4036   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4037   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4038   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4039   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4040   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4041  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4042   (set_attr "type" "ssemuladd")
4043   (set_attr "mode" "<MODE>")])
4044
4045(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4046  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4047	(unspec:VF_SF_AVX512VL
4048	  [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4049	   (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4050	   (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4051	  UNSPEC_FMADDSUB))]
4052  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4053  "@
4054   vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4055   vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4056   vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4057  [(set_attr "type" "ssemuladd")
4058   (set_attr "mode" "<MODE>")])
4059
4060(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4061  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4062	(vec_merge:VF_AVX512VL
4063	  (unspec:VF_AVX512VL
4064	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4065	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4066	     (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
4067	    UNSPEC_FMADDSUB)
4068	  (match_dup 1)
4069	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4070  "TARGET_AVX512F"
4071  "@
4072   vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4073   vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4074  [(set_attr "type" "ssemuladd")
4075   (set_attr "mode" "<MODE>")])
4076
4077(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4078  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4079	(vec_merge:VF_AVX512VL
4080	  (unspec:VF_AVX512VL
4081	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4082	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4083	     (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4084	    UNSPEC_FMADDSUB)
4085	  (match_dup 3)
4086	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4087  "TARGET_AVX512F"
4088  "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4089  [(set_attr "type" "ssemuladd")
4090   (set_attr "mode" "<MODE>")])
4091
4092(define_insn "*fma_fmsubadd_<mode>"
4093  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4094	(unspec:VF_128_256
4095	  [(match_operand:VF_128_256   1 "nonimmediate_operand" "%0,0,v,x,x")
4096	   (match_operand:VF_128_256   2 "nonimmediate_operand" "vm,v,vm,x,m")
4097	   (neg:VF_128_256
4098	     (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4099	  UNSPEC_FMADDSUB))]
4100  "TARGET_FMA || TARGET_FMA4"
4101  "@
4102   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4103   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4104   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4105   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4106   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4107  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4108   (set_attr "type" "ssemuladd")
4109   (set_attr "mode" "<MODE>")])
4110
4111(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4112  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4113	(unspec:VF_SF_AVX512VL
4114	  [(match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
4115	   (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4116	   (neg:VF_SF_AVX512VL
4117	     (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4118	  UNSPEC_FMADDSUB))]
4119  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4120  "@
4121   vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4122   vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4123   vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4124  [(set_attr "type" "ssemuladd")
4125   (set_attr "mode" "<MODE>")])
4126
4127(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4128  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4129	(vec_merge:VF_AVX512VL
4130	  (unspec:VF_AVX512VL
4131	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4132	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4133	     (neg:VF_AVX512VL
4134	       (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4135	    UNSPEC_FMADDSUB)
4136	  (match_dup 1)
4137	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4138  "TARGET_AVX512F"
4139  "@
4140   vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4141   vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4142  [(set_attr "type" "ssemuladd")
4143   (set_attr "mode" "<MODE>")])
4144
4145(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4146  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4147	(vec_merge:VF_AVX512VL
4148	  (unspec:VF_AVX512VL
4149	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4150	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4151	     (neg:VF_AVX512VL
4152	       (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4153	    UNSPEC_FMADDSUB)
4154	  (match_dup 3)
4155	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4156  "TARGET_AVX512F"
4157  "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4158  [(set_attr "type" "ssemuladd")
4159   (set_attr "mode" "<MODE>")])
4160
4161;; FMA3 floating point scalar intrinsics. These merge result with
4162;; high-order elements from the destination register.
4163
4164(define_expand "fmai_vmfmadd_<mode><round_name>"
4165  [(set (match_operand:VF_128 0 "register_operand")
4166	(vec_merge:VF_128
4167	  (fma:VF_128
4168	    (match_operand:VF_128 1 "<round_nimm_predicate>")
4169	    (match_operand:VF_128 2 "<round_nimm_predicate>")
4170	    (match_operand:VF_128 3 "<round_nimm_predicate>"))
4171	  (match_dup 1)
4172	  (const_int 1)))]
4173  "TARGET_FMA")
4174
4175(define_insn "*fmai_fmadd_<mode>"
4176  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4177        (vec_merge:VF_128
4178	  (fma:VF_128
4179	    (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4180	    (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4181	    (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4182	  (match_dup 1)
4183	  (const_int 1)))]
4184  "TARGET_FMA || TARGET_AVX512F"
4185  "@
4186   vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4187   vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4188  [(set_attr "type" "ssemuladd")
4189   (set_attr "mode" "<MODE>")])
4190
4191(define_insn "*fmai_fmsub_<mode>"
4192  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4193        (vec_merge:VF_128
4194	  (fma:VF_128
4195	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
4196	    (match_operand:VF_128   2 "<round_nimm_predicate>" "<round_constraint>,v")
4197	    (neg:VF_128
4198	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4199	  (match_dup 1)
4200	  (const_int 1)))]
4201  "TARGET_FMA || TARGET_AVX512F"
4202  "@
4203   vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4204   vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4205  [(set_attr "type" "ssemuladd")
4206   (set_attr "mode" "<MODE>")])
4207
4208(define_insn "*fmai_fnmadd_<mode><round_name>"
4209  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4210        (vec_merge:VF_128
4211	  (fma:VF_128
4212	    (neg:VF_128
4213	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4214	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
4215	    (match_operand:VF_128   3 "<round_nimm_predicate>" "v,<round_constraint>"))
4216	  (match_dup 1)
4217	  (const_int 1)))]
4218  "TARGET_FMA || TARGET_AVX512F"
4219  "@
4220   vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4221   vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4222  [(set_attr "type" "ssemuladd")
4223   (set_attr "mode" "<MODE>")])
4224
4225(define_insn "*fmai_fnmsub_<mode><round_name>"
4226  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4227        (vec_merge:VF_128
4228	  (fma:VF_128
4229	    (neg:VF_128
4230	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4231	    (match_operand:VF_128   1 "<round_nimm_predicate>" " 0, 0")
4232	    (neg:VF_128
4233	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4234	  (match_dup 1)
4235	  (const_int 1)))]
4236  "TARGET_FMA || TARGET_AVX512F"
4237  "@
4238   vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4239   vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4240  [(set_attr "type" "ssemuladd")
4241   (set_attr "mode" "<MODE>")])
4242
4243;; FMA4 floating point scalar intrinsics.  These write the
4244;; entire destination register, with the high-order elements zeroed.
4245
4246(define_expand "fma4i_vmfmadd_<mode>"
4247  [(set (match_operand:VF_128 0 "register_operand")
4248	(vec_merge:VF_128
4249	  (fma:VF_128
4250	    (match_operand:VF_128 1 "nonimmediate_operand")
4251	    (match_operand:VF_128 2 "nonimmediate_operand")
4252	    (match_operand:VF_128 3 "nonimmediate_operand"))
4253	  (match_dup 4)
4254	  (const_int 1)))]
4255  "TARGET_FMA4"
4256  "operands[4] = CONST0_RTX (<MODE>mode);")
4257
4258(define_insn "*fma4i_vmfmadd_<mode>"
4259  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4260	(vec_merge:VF_128
4261	  (fma:VF_128
4262	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4263	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4264	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4265	  (match_operand:VF_128 4 "const0_operand")
4266	  (const_int 1)))]
4267  "TARGET_FMA4"
4268  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4269  [(set_attr "type" "ssemuladd")
4270   (set_attr "mode" "<MODE>")])
4271
4272(define_insn "*fma4i_vmfmsub_<mode>"
4273  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4274	(vec_merge:VF_128
4275	  (fma:VF_128
4276	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4277	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4278	    (neg:VF_128
4279	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4280	  (match_operand:VF_128 4 "const0_operand")
4281	  (const_int 1)))]
4282  "TARGET_FMA4"
4283  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4284  [(set_attr "type" "ssemuladd")
4285   (set_attr "mode" "<MODE>")])
4286
4287(define_insn "*fma4i_vmfnmadd_<mode>"
4288  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4289	(vec_merge:VF_128
4290	  (fma:VF_128
4291	    (neg:VF_128
4292	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4293	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
4294	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
4295	  (match_operand:VF_128 4 "const0_operand")
4296	  (const_int 1)))]
4297  "TARGET_FMA4"
4298  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4299  [(set_attr "type" "ssemuladd")
4300   (set_attr "mode" "<MODE>")])
4301
4302(define_insn "*fma4i_vmfnmsub_<mode>"
4303  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4304	(vec_merge:VF_128
4305	  (fma:VF_128
4306	    (neg:VF_128
4307	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4308	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
4309	    (neg:VF_128
4310	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
4311	  (match_operand:VF_128 4 "const0_operand")
4312	  (const_int 1)))]
4313  "TARGET_FMA4"
4314  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4315  [(set_attr "type" "ssemuladd")
4316   (set_attr "mode" "<MODE>")])
4317
4318;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4319;;
4320;; Parallel single-precision floating point conversion operations
4321;;
4322;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4323
4324(define_insn "sse_cvtpi2ps"
4325  [(set (match_operand:V4SF 0 "register_operand" "=x")
4326	(vec_merge:V4SF
4327	  (vec_duplicate:V4SF
4328	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4329	  (match_operand:V4SF 1 "register_operand" "0")
4330	  (const_int 3)))]
4331  "TARGET_SSE"
4332  "cvtpi2ps\t{%2, %0|%0, %2}"
4333  [(set_attr "type" "ssecvt")
4334   (set_attr "mode" "V4SF")])
4335
4336(define_insn "sse_cvtps2pi"
4337  [(set (match_operand:V2SI 0 "register_operand" "=y")
4338	(vec_select:V2SI
4339	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4340		       UNSPEC_FIX_NOTRUNC)
4341	  (parallel [(const_int 0) (const_int 1)])))]
4342  "TARGET_SSE"
4343  "cvtps2pi\t{%1, %0|%0, %q1}"
4344  [(set_attr "type" "ssecvt")
4345   (set_attr "unit" "mmx")
4346   (set_attr "mode" "DI")])
4347
4348(define_insn "sse_cvttps2pi"
4349  [(set (match_operand:V2SI 0 "register_operand" "=y")
4350	(vec_select:V2SI
4351	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4352	  (parallel [(const_int 0) (const_int 1)])))]
4353  "TARGET_SSE"
4354  "cvttps2pi\t{%1, %0|%0, %q1}"
4355  [(set_attr "type" "ssecvt")
4356   (set_attr "unit" "mmx")
4357   (set_attr "prefix_rep" "0")
4358   (set_attr "mode" "SF")])
4359
4360(define_insn "sse_cvtsi2ss<round_name>"
4361  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4362	(vec_merge:V4SF
4363	  (vec_duplicate:V4SF
4364	    (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4365	  (match_operand:V4SF 1 "register_operand" "0,0,v")
4366	  (const_int 1)))]
4367  "TARGET_SSE"
4368  "@
4369   cvtsi2ss\t{%2, %0|%0, %2}
4370   cvtsi2ss\t{%2, %0|%0, %2}
4371   vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4372  [(set_attr "isa" "noavx,noavx,avx")
4373   (set_attr "type" "sseicvt")
4374   (set_attr "athlon_decode" "vector,double,*")
4375   (set_attr "amdfam10_decode" "vector,double,*")
4376   (set_attr "bdver1_decode" "double,direct,*")
4377   (set_attr "btver2_decode" "double,double,double")
4378   (set_attr "znver1_decode" "double,double,double")
4379   (set_attr "prefix" "orig,orig,maybe_evex")
4380   (set_attr "mode" "SF")])
4381
4382(define_insn "sse_cvtsi2ssq<round_name>"
4383  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4384	(vec_merge:V4SF
4385	  (vec_duplicate:V4SF
4386	    (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4387	  (match_operand:V4SF 1 "register_operand" "0,0,v")
4388	  (const_int 1)))]
4389  "TARGET_SSE && TARGET_64BIT"
4390  "@
4391   cvtsi2ssq\t{%2, %0|%0, %2}
4392   cvtsi2ssq\t{%2, %0|%0, %2}
4393   vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4394  [(set_attr "isa" "noavx,noavx,avx")
4395   (set_attr "type" "sseicvt")
4396   (set_attr "athlon_decode" "vector,double,*")
4397   (set_attr "amdfam10_decode" "vector,double,*")
4398   (set_attr "bdver1_decode" "double,direct,*")
4399   (set_attr "btver2_decode" "double,double,double")
4400   (set_attr "length_vex" "*,*,4")
4401   (set_attr "prefix_rex" "1,1,*")
4402   (set_attr "prefix" "orig,orig,maybe_evex")
4403   (set_attr "mode" "SF")])
4404
4405(define_insn "sse_cvtss2si<round_name>"
4406  [(set (match_operand:SI 0 "register_operand" "=r,r")
4407	(unspec:SI
4408	  [(vec_select:SF
4409	     (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4410	     (parallel [(const_int 0)]))]
4411	  UNSPEC_FIX_NOTRUNC))]
4412  "TARGET_SSE"
4413  "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4414  [(set_attr "type" "sseicvt")
4415   (set_attr "athlon_decode" "double,vector")
4416   (set_attr "bdver1_decode" "double,double")
4417   (set_attr "prefix_rep" "1")
4418   (set_attr "prefix" "maybe_vex")
4419   (set_attr "mode" "SI")])
4420
4421(define_insn "sse_cvtss2si_2"
4422  [(set (match_operand:SI 0 "register_operand" "=r,r")
4423	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4424		   UNSPEC_FIX_NOTRUNC))]
4425  "TARGET_SSE"
4426  "%vcvtss2si\t{%1, %0|%0, %k1}"
4427  [(set_attr "type" "sseicvt")
4428   (set_attr "athlon_decode" "double,vector")
4429   (set_attr "amdfam10_decode" "double,double")
4430   (set_attr "bdver1_decode" "double,double")
4431   (set_attr "prefix_rep" "1")
4432   (set_attr "prefix" "maybe_vex")
4433   (set_attr "mode" "SI")])
4434
4435(define_insn "sse_cvtss2siq<round_name>"
4436  [(set (match_operand:DI 0 "register_operand" "=r,r")
4437	(unspec:DI
4438	  [(vec_select:SF
4439	     (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4440	     (parallel [(const_int 0)]))]
4441	  UNSPEC_FIX_NOTRUNC))]
4442  "TARGET_SSE && TARGET_64BIT"
4443  "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4444  [(set_attr "type" "sseicvt")
4445   (set_attr "athlon_decode" "double,vector")
4446   (set_attr "bdver1_decode" "double,double")
4447   (set_attr "prefix_rep" "1")
4448   (set_attr "prefix" "maybe_vex")
4449   (set_attr "mode" "DI")])
4450
4451(define_insn "sse_cvtss2siq_2"
4452  [(set (match_operand:DI 0 "register_operand" "=r,r")
4453	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4454		   UNSPEC_FIX_NOTRUNC))]
4455  "TARGET_SSE && TARGET_64BIT"
4456  "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4457  [(set_attr "type" "sseicvt")
4458   (set_attr "athlon_decode" "double,vector")
4459   (set_attr "amdfam10_decode" "double,double")
4460   (set_attr "bdver1_decode" "double,double")
4461   (set_attr "prefix_rep" "1")
4462   (set_attr "prefix" "maybe_vex")
4463   (set_attr "mode" "DI")])
4464
4465(define_insn "sse_cvttss2si<round_saeonly_name>"
4466  [(set (match_operand:SI 0 "register_operand" "=r,r")
4467	(fix:SI
4468	  (vec_select:SF
4469	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4470	    (parallel [(const_int 0)]))))]
4471  "TARGET_SSE"
4472  "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4473  [(set_attr "type" "sseicvt")
4474   (set_attr "athlon_decode" "double,vector")
4475   (set_attr "amdfam10_decode" "double,double")
4476   (set_attr "bdver1_decode" "double,double")
4477   (set_attr "prefix_rep" "1")
4478   (set_attr "prefix" "maybe_vex")
4479   (set_attr "mode" "SI")])
4480
4481(define_insn "sse_cvttss2siq<round_saeonly_name>"
4482  [(set (match_operand:DI 0 "register_operand" "=r,r")
4483	(fix:DI
4484	  (vec_select:SF
4485	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4486	    (parallel [(const_int 0)]))))]
4487  "TARGET_SSE && TARGET_64BIT"
4488  "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4489  [(set_attr "type" "sseicvt")
4490   (set_attr "athlon_decode" "double,vector")
4491   (set_attr "amdfam10_decode" "double,double")
4492   (set_attr "bdver1_decode" "double,double")
4493   (set_attr "prefix_rep" "1")
4494   (set_attr "prefix" "maybe_vex")
4495   (set_attr "mode" "DI")])
4496
4497(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4498  [(set (match_operand:VF_128 0 "register_operand" "=v")
4499	(vec_merge:VF_128
4500	  (vec_duplicate:VF_128
4501	    (unsigned_float:<ssescalarmode>
4502	      (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4503	  (match_operand:VF_128 1 "register_operand" "v")
4504	  (const_int 1)))]
4505  "TARGET_AVX512F && <round_modev4sf_condition>"
4506  "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4507  [(set_attr "type" "sseicvt")
4508   (set_attr "prefix" "evex")
4509   (set_attr "mode" "<ssescalarmode>")])
4510
4511(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4512  [(set (match_operand:VF_128 0 "register_operand" "=v")
4513	(vec_merge:VF_128
4514	  (vec_duplicate:VF_128
4515	    (unsigned_float:<ssescalarmode>
4516	      (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4517	  (match_operand:VF_128 1 "register_operand" "v")
4518	  (const_int 1)))]
4519  "TARGET_AVX512F && TARGET_64BIT"
4520  "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4521  [(set_attr "type" "sseicvt")
4522   (set_attr "prefix" "evex")
4523   (set_attr "mode" "<ssescalarmode>")])
4524
4525(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4526  [(set (match_operand:VF1 0 "register_operand" "=x,v")
4527	(float:VF1
4528	  (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4529  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4530  "@
4531   cvtdq2ps\t{%1, %0|%0, %1}
4532   vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4533  [(set_attr "isa" "noavx,avx")
4534   (set_attr "type" "ssecvt")
4535   (set_attr "prefix" "maybe_vex")
4536   (set_attr "mode" "<sseinsnmode>")])
4537
4538(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4539  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4540	(unsigned_float:VF1_AVX512VL
4541	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4542  "TARGET_AVX512F"
4543  "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4544  [(set_attr "type" "ssecvt")
4545   (set_attr "prefix" "evex")
4546   (set_attr "mode" "<MODE>")])
4547
4548(define_expand "floatuns<sseintvecmodelower><mode>2"
4549  [(match_operand:VF1 0 "register_operand")
4550   (match_operand:<sseintvecmode> 1 "register_operand")]
4551  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4552{
4553  if (<MODE>mode == V16SFmode)
4554    emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4555  else
4556    if (TARGET_AVX512VL)
4557      {
4558	if (<MODE>mode == V4SFmode)
4559	  emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4560	else
4561	  emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4562      }
4563  else
4564    ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4565
4566  DONE;
4567})
4568
4569
4570;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4571(define_mode_attr sf2simodelower
4572  [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4573
4574(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4575  [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4576	(unspec:VI4_AVX
4577	  [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4578	  UNSPEC_FIX_NOTRUNC))]
4579  "TARGET_SSE2 && <mask_mode512bit_condition>"
4580  "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4581  [(set_attr "type" "ssecvt")
4582   (set (attr "prefix_data16")
4583     (if_then_else
4584       (match_test "TARGET_AVX")
4585     (const_string "*")
4586     (const_string "1")))
4587   (set_attr "prefix" "maybe_vex")
4588   (set_attr "mode" "<sseinsnmode>")])
4589
4590(define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4591  [(set (match_operand:V16SI 0 "register_operand" "=v")
4592	(unspec:V16SI
4593	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4594	  UNSPEC_FIX_NOTRUNC))]
4595  "TARGET_AVX512F"
4596  "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4597  [(set_attr "type" "ssecvt")
4598   (set_attr "prefix" "evex")
4599   (set_attr "mode" "XI")])
4600
4601(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4602  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4603	(unspec:VI4_AVX512VL
4604	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4605	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4606  "TARGET_AVX512F"
4607  "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4608  [(set_attr "type" "ssecvt")
4609   (set_attr "prefix" "evex")
4610   (set_attr "mode" "<sseinsnmode>")])
4611
4612(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4613  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4614	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4615		     UNSPEC_FIX_NOTRUNC))]
4616  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4617  "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4618  [(set_attr "type" "ssecvt")
4619   (set_attr "prefix" "evex")
4620   (set_attr "mode" "<sseinsnmode>")])
4621
4622(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4623  [(set (match_operand:V2DI 0 "register_operand" "=v")
4624	(unspec:V2DI
4625	  [(vec_select:V2SF
4626	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4627	     (parallel [(const_int 0) (const_int 1)]))]
4628	  UNSPEC_FIX_NOTRUNC))]
4629  "TARGET_AVX512DQ && TARGET_AVX512VL"
4630  "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4631  [(set_attr "type" "ssecvt")
4632   (set_attr "prefix" "evex")
4633   (set_attr "mode" "TI")])
4634
4635(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4636  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4637	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4638		     UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4639  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4640  "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4641  [(set_attr "type" "ssecvt")
4642   (set_attr "prefix" "evex")
4643   (set_attr "mode" "<sseinsnmode>")])
4644
4645(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4646  [(set (match_operand:V2DI 0 "register_operand" "=v")
4647	(unspec:V2DI
4648	  [(vec_select:V2SF
4649	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4650	     (parallel [(const_int 0) (const_int 1)]))]
4651	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4652  "TARGET_AVX512DQ && TARGET_AVX512VL"
4653  "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4654  [(set_attr "type" "ssecvt")
4655   (set_attr "prefix" "evex")
4656   (set_attr "mode" "TI")])
4657
4658(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4659  [(set (match_operand:V16SI 0 "register_operand" "=v")
4660	(any_fix:V16SI
4661	  (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4662  "TARGET_AVX512F"
4663  "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4664  [(set_attr "type" "ssecvt")
4665   (set_attr "prefix" "evex")
4666   (set_attr "mode" "XI")])
4667
4668(define_insn "fix_truncv8sfv8si2<mask_name>"
4669  [(set (match_operand:V8SI 0 "register_operand" "=v")
4670	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4671  "TARGET_AVX && <mask_avx512vl_condition>"
4672  "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4673  [(set_attr "type" "ssecvt")
4674   (set_attr "prefix" "<mask_prefix>")
4675   (set_attr "mode" "OI")])
4676
4677(define_insn "fix_truncv4sfv4si2<mask_name>"
4678  [(set (match_operand:V4SI 0 "register_operand" "=v")
4679	(fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4680  "TARGET_SSE2 && <mask_avx512vl_condition>"
4681  "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4682  [(set_attr "type" "ssecvt")
4683   (set (attr "prefix_rep")
4684     (if_then_else
4685       (match_test "TARGET_AVX")
4686     (const_string "*")
4687     (const_string "1")))
4688   (set (attr "prefix_data16")
4689     (if_then_else
4690       (match_test "TARGET_AVX")
4691     (const_string "*")
4692     (const_string "0")))
4693   (set_attr "prefix_data16" "0")
4694   (set_attr "prefix" "<mask_prefix2>")
4695   (set_attr "mode" "TI")])
4696
4697(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4698  [(match_operand:<sseintvecmode> 0 "register_operand")
4699   (match_operand:VF1 1 "register_operand")]
4700  "TARGET_SSE2"
4701{
4702  if (<MODE>mode == V16SFmode)
4703    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4704					  operands[1]));
4705  else
4706    {
4707      rtx tmp[3];
4708      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4709      tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4710      emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4711      emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4712    }
4713  DONE;
4714})
4715
4716;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4717;;
4718;; Parallel double-precision floating point conversion operations
4719;;
4720;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4721
4722(define_insn "sse2_cvtpi2pd"
4723  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4724	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4725  "TARGET_SSE2"
4726  "cvtpi2pd\t{%1, %0|%0, %1}"
4727  [(set_attr "type" "ssecvt")
4728   (set_attr "unit" "mmx,*")
4729   (set_attr "prefix_data16" "1,*")
4730   (set_attr "mode" "V2DF")])
4731
4732(define_insn "sse2_cvtpd2pi"
4733  [(set (match_operand:V2SI 0 "register_operand" "=y")
4734	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4735		     UNSPEC_FIX_NOTRUNC))]
4736  "TARGET_SSE2"
4737  "cvtpd2pi\t{%1, %0|%0, %1}"
4738  [(set_attr "type" "ssecvt")
4739   (set_attr "unit" "mmx")
4740   (set_attr "bdver1_decode" "double")
4741   (set_attr "btver2_decode" "direct")
4742   (set_attr "prefix_data16" "1")
4743   (set_attr "mode" "DI")])
4744
4745(define_insn "sse2_cvttpd2pi"
4746  [(set (match_operand:V2SI 0 "register_operand" "=y")
4747	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4748  "TARGET_SSE2"
4749  "cvttpd2pi\t{%1, %0|%0, %1}"
4750  [(set_attr "type" "ssecvt")
4751   (set_attr "unit" "mmx")
4752   (set_attr "bdver1_decode" "double")
4753   (set_attr "prefix_data16" "1")
4754   (set_attr "mode" "TI")])
4755
4756(define_insn "sse2_cvtsi2sd"
4757  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4758	(vec_merge:V2DF
4759	  (vec_duplicate:V2DF
4760	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4761	  (match_operand:V2DF 1 "register_operand" "0,0,v")
4762	  (const_int 1)))]
4763  "TARGET_SSE2"
4764  "@
4765   cvtsi2sd\t{%2, %0|%0, %2}
4766   cvtsi2sd\t{%2, %0|%0, %2}
4767   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4768  [(set_attr "isa" "noavx,noavx,avx")
4769   (set_attr "type" "sseicvt")
4770   (set_attr "athlon_decode" "double,direct,*")
4771   (set_attr "amdfam10_decode" "vector,double,*")
4772   (set_attr "bdver1_decode" "double,direct,*")
4773   (set_attr "btver2_decode" "double,double,double")
4774   (set_attr "znver1_decode" "double,double,double")
4775   (set_attr "prefix" "orig,orig,maybe_evex")
4776   (set_attr "mode" "DF")])
4777
4778(define_insn "sse2_cvtsi2sdq<round_name>"
4779  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4780	(vec_merge:V2DF
4781	  (vec_duplicate:V2DF
4782	    (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4783	  (match_operand:V2DF 1 "register_operand" "0,0,v")
4784	  (const_int 1)))]
4785  "TARGET_SSE2 && TARGET_64BIT"
4786  "@
4787   cvtsi2sdq\t{%2, %0|%0, %2}
4788   cvtsi2sdq\t{%2, %0|%0, %2}
4789   vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4790  [(set_attr "isa" "noavx,noavx,avx")
4791   (set_attr "type" "sseicvt")
4792   (set_attr "athlon_decode" "double,direct,*")
4793   (set_attr "amdfam10_decode" "vector,double,*")
4794   (set_attr "bdver1_decode" "double,direct,*")
4795   (set_attr "length_vex" "*,*,4")
4796   (set_attr "prefix_rex" "1,1,*")
4797   (set_attr "prefix" "orig,orig,maybe_evex")
4798   (set_attr "mode" "DF")])
4799
4800(define_insn "avx512f_vcvtss2usi<round_name>"
4801  [(set (match_operand:SI 0 "register_operand" "=r")
4802	(unspec:SI
4803	  [(vec_select:SF
4804	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4805	     (parallel [(const_int 0)]))]
4806	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4807  "TARGET_AVX512F"
4808  "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4809  [(set_attr "type" "sseicvt")
4810   (set_attr "prefix" "evex")
4811   (set_attr "mode" "SI")])
4812
4813(define_insn "avx512f_vcvtss2usiq<round_name>"
4814  [(set (match_operand:DI 0 "register_operand" "=r")
4815	(unspec:DI
4816	  [(vec_select:SF
4817	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4818	     (parallel [(const_int 0)]))]
4819	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4820  "TARGET_AVX512F && TARGET_64BIT"
4821  "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4822  [(set_attr "type" "sseicvt")
4823   (set_attr "prefix" "evex")
4824   (set_attr "mode" "DI")])
4825
4826(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4827  [(set (match_operand:SI 0 "register_operand" "=r")
4828	(unsigned_fix:SI
4829	  (vec_select:SF
4830	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4831	    (parallel [(const_int 0)]))))]
4832  "TARGET_AVX512F"
4833  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4834  [(set_attr "type" "sseicvt")
4835   (set_attr "prefix" "evex")
4836   (set_attr "mode" "SI")])
4837
4838(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4839  [(set (match_operand:DI 0 "register_operand" "=r")
4840	(unsigned_fix:DI
4841	  (vec_select:SF
4842	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4843	    (parallel [(const_int 0)]))))]
4844  "TARGET_AVX512F && TARGET_64BIT"
4845  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4846  [(set_attr "type" "sseicvt")
4847   (set_attr "prefix" "evex")
4848   (set_attr "mode" "DI")])
4849
4850(define_insn "avx512f_vcvtsd2usi<round_name>"
4851  [(set (match_operand:SI 0 "register_operand" "=r")
4852	(unspec:SI
4853	  [(vec_select:DF
4854	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4855	     (parallel [(const_int 0)]))]
4856	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4857  "TARGET_AVX512F"
4858  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4859  [(set_attr "type" "sseicvt")
4860   (set_attr "prefix" "evex")
4861   (set_attr "mode" "SI")])
4862
4863(define_insn "avx512f_vcvtsd2usiq<round_name>"
4864  [(set (match_operand:DI 0 "register_operand" "=r")
4865	(unspec:DI
4866	  [(vec_select:DF
4867	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4868	     (parallel [(const_int 0)]))]
4869	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4870  "TARGET_AVX512F && TARGET_64BIT"
4871  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4872  [(set_attr "type" "sseicvt")
4873   (set_attr "prefix" "evex")
4874   (set_attr "mode" "DI")])
4875
4876(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4877  [(set (match_operand:SI 0 "register_operand" "=r")
4878	(unsigned_fix:SI
4879	  (vec_select:DF
4880	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4881	    (parallel [(const_int 0)]))))]
4882  "TARGET_AVX512F"
4883  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4884  [(set_attr "type" "sseicvt")
4885   (set_attr "prefix" "evex")
4886   (set_attr "mode" "SI")])
4887
4888(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4889  [(set (match_operand:DI 0 "register_operand" "=r")
4890	(unsigned_fix:DI
4891	  (vec_select:DF
4892	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4893	    (parallel [(const_int 0)]))))]
4894  "TARGET_AVX512F && TARGET_64BIT"
4895  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4896  [(set_attr "type" "sseicvt")
4897   (set_attr "prefix" "evex")
4898   (set_attr "mode" "DI")])
4899
4900(define_insn "sse2_cvtsd2si<round_name>"
4901  [(set (match_operand:SI 0 "register_operand" "=r,r")
4902	(unspec:SI
4903	  [(vec_select:DF
4904	     (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4905	     (parallel [(const_int 0)]))]
4906	  UNSPEC_FIX_NOTRUNC))]
4907  "TARGET_SSE2"
4908  "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4909  [(set_attr "type" "sseicvt")
4910   (set_attr "athlon_decode" "double,vector")
4911   (set_attr "bdver1_decode" "double,double")
4912   (set_attr "btver2_decode" "double,double")
4913   (set_attr "prefix_rep" "1")
4914   (set_attr "prefix" "maybe_vex")
4915   (set_attr "mode" "SI")])
4916
4917(define_insn "sse2_cvtsd2si_2"
4918  [(set (match_operand:SI 0 "register_operand" "=r,r")
4919	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4920		   UNSPEC_FIX_NOTRUNC))]
4921  "TARGET_SSE2"
4922  "%vcvtsd2si\t{%1, %0|%0, %q1}"
4923  [(set_attr "type" "sseicvt")
4924   (set_attr "athlon_decode" "double,vector")
4925   (set_attr "amdfam10_decode" "double,double")
4926   (set_attr "bdver1_decode" "double,double")
4927   (set_attr "prefix_rep" "1")
4928   (set_attr "prefix" "maybe_vex")
4929   (set_attr "mode" "SI")])
4930
4931(define_insn "sse2_cvtsd2siq<round_name>"
4932  [(set (match_operand:DI 0 "register_operand" "=r,r")
4933	(unspec:DI
4934	  [(vec_select:DF
4935	     (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4936	     (parallel [(const_int 0)]))]
4937	  UNSPEC_FIX_NOTRUNC))]
4938  "TARGET_SSE2 && TARGET_64BIT"
4939  "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4940  [(set_attr "type" "sseicvt")
4941   (set_attr "athlon_decode" "double,vector")
4942   (set_attr "bdver1_decode" "double,double")
4943   (set_attr "prefix_rep" "1")
4944   (set_attr "prefix" "maybe_vex")
4945   (set_attr "mode" "DI")])
4946
4947(define_insn "sse2_cvtsd2siq_2"
4948  [(set (match_operand:DI 0 "register_operand" "=r,r")
4949	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4950		   UNSPEC_FIX_NOTRUNC))]
4951  "TARGET_SSE2 && TARGET_64BIT"
4952  "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4953  [(set_attr "type" "sseicvt")
4954   (set_attr "athlon_decode" "double,vector")
4955   (set_attr "amdfam10_decode" "double,double")
4956   (set_attr "bdver1_decode" "double,double")
4957   (set_attr "prefix_rep" "1")
4958   (set_attr "prefix" "maybe_vex")
4959   (set_attr "mode" "DI")])
4960
4961(define_insn "sse2_cvttsd2si<round_saeonly_name>"
4962  [(set (match_operand:SI 0 "register_operand" "=r,r")
4963	(fix:SI
4964	  (vec_select:DF
4965	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4966	    (parallel [(const_int 0)]))))]
4967  "TARGET_SSE2"
4968  "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4969  [(set_attr "type" "sseicvt")
4970   (set_attr "athlon_decode" "double,vector")
4971   (set_attr "amdfam10_decode" "double,double")
4972   (set_attr "bdver1_decode" "double,double")
4973   (set_attr "btver2_decode" "double,double")
4974   (set_attr "prefix_rep" "1")
4975   (set_attr "prefix" "maybe_vex")
4976   (set_attr "mode" "SI")])
4977
4978(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4979  [(set (match_operand:DI 0 "register_operand" "=r,r")
4980	(fix:DI
4981	  (vec_select:DF
4982	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4983	    (parallel [(const_int 0)]))))]
4984  "TARGET_SSE2 && TARGET_64BIT"
4985  "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4986  [(set_attr "type" "sseicvt")
4987   (set_attr "athlon_decode" "double,vector")
4988   (set_attr "amdfam10_decode" "double,double")
4989   (set_attr "bdver1_decode" "double,double")
4990   (set_attr "prefix_rep" "1")
4991   (set_attr "prefix" "maybe_vex")
4992   (set_attr "mode" "DI")])
4993
4994;; For float<si2dfmode><mode>2 insn pattern
4995(define_mode_attr si2dfmode
4996  [(V8DF "V8SI") (V4DF "V4SI")])
4997(define_mode_attr si2dfmodelower
4998  [(V8DF "v8si") (V4DF "v4si")])
4999
5000(define_insn "float<si2dfmodelower><mode>2<mask_name>"
5001  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5002	(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5003  "TARGET_AVX && <mask_mode512bit_condition>"
5004  "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5005  [(set_attr "type" "ssecvt")
5006   (set_attr "prefix" "maybe_vex")
5007   (set_attr "mode" "<MODE>")])
5008
5009(define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
5010  [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5011	(any_float:VF2_AVX512VL
5012	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5013  "TARGET_AVX512DQ"
5014  "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5015  [(set_attr "type" "ssecvt")
5016   (set_attr "prefix" "evex")
5017   (set_attr "mode" "<MODE>")])
5018
5019;; For <floatsuffix>float<sselondveclower><mode> insn patterns
5020(define_mode_attr qq2pssuff
5021  [(V8SF "") (V4SF "{y}")])
5022
5023(define_mode_attr sselongvecmode
5024  [(V8SF "V8DI") (V4SF  "V4DI")])
5025
5026(define_mode_attr sselongvecmodelower
5027  [(V8SF "v8di") (V4SF  "v4di")])
5028
5029(define_mode_attr sseintvecmode3
5030  [(V8SF "XI") (V4SF "OI")
5031   (V8DF "OI") (V4DF "TI")])
5032
5033(define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
5034  [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5035	 (any_float:VF1_128_256VL
5036	   (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5037  "TARGET_AVX512DQ && <round_modev8sf_condition>"
5038  "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5039  [(set_attr "type" "ssecvt")
5040   (set_attr "prefix" "evex")
5041   (set_attr "mode" "<MODE>")])
5042
5043(define_insn "*<floatsuffix>floatv2div2sf2"
5044  [(set (match_operand:V4SF 0 "register_operand" "=v")
5045    (vec_concat:V4SF
5046	    (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5047	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5048  "TARGET_AVX512DQ && TARGET_AVX512VL"
5049  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5050  [(set_attr "type" "ssecvt")
5051   (set_attr "prefix" "evex")
5052   (set_attr "mode" "V4SF")])
5053
5054(define_insn "<floatsuffix>floatv2div2sf2_mask"
5055  [(set (match_operand:V4SF 0 "register_operand" "=v")
5056    (vec_concat:V4SF
5057        (vec_merge:V2SF
5058	        (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5059            (vec_select:V2SF
5060                (match_operand:V4SF 2 "vector_move_operand" "0C")
5061                (parallel [(const_int 0) (const_int 1)]))
5062            (match_operand:QI 3 "register_operand" "Yk"))
5063	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5064  "TARGET_AVX512DQ && TARGET_AVX512VL"
5065  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5066  [(set_attr "type" "ssecvt")
5067   (set_attr "prefix" "evex")
5068   (set_attr "mode" "V4SF")])
5069
5070(define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
5071  [(set (match_operand:V4SF 0 "register_operand" "=v")
5072    (vec_concat:V4SF
5073	(vec_merge:V2SF
5074		(any_float:V2SF (match_operand:V2DI 1
5075				  "nonimmediate_operand" "vm"))
5076	    (const_vector:V2SF [(const_int 0) (const_int 0)])
5077	    (match_operand:QI 2 "register_operand" "Yk"))
5078	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5079  "TARGET_AVX512DQ && TARGET_AVX512VL"
5080  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5081  [(set_attr "type" "ssecvt")
5082   (set_attr "prefix" "evex")
5083   (set_attr "mode" "V4SF")])
5084
5085(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5086  [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5087	(unsigned_float:VF2_512_256VL
5088	  (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5089   "TARGET_AVX512F"
5090   "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5091   [(set_attr "type" "ssecvt")
5092    (set_attr "prefix" "evex")
5093    (set_attr "mode" "<MODE>")])
5094
5095(define_insn "ufloatv2siv2df2<mask_name>"
5096  [(set (match_operand:V2DF 0 "register_operand" "=v")
5097	(unsigned_float:V2DF
5098	  (vec_select:V2SI
5099	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5100	    (parallel [(const_int 0) (const_int 1)]))))]
5101  "TARGET_AVX512VL"
5102  "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5103  [(set_attr "type" "ssecvt")
5104   (set_attr "prefix" "evex")
5105   (set_attr "mode" "V2DF")])
5106
5107(define_insn "avx512f_cvtdq2pd512_2"
5108  [(set (match_operand:V8DF 0 "register_operand" "=v")
5109	(float:V8DF
5110	  (vec_select:V8SI
5111	    (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5112	    (parallel [(const_int 0) (const_int 1)
5113		       (const_int 2) (const_int 3)
5114		       (const_int 4) (const_int 5)
5115		       (const_int 6) (const_int 7)]))))]
5116  "TARGET_AVX512F"
5117  "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5118  [(set_attr "type" "ssecvt")
5119   (set_attr "prefix" "evex")
5120   (set_attr "mode" "V8DF")])
5121
5122(define_insn "avx_cvtdq2pd256_2"
5123  [(set (match_operand:V4DF 0 "register_operand" "=v")
5124	(float:V4DF
5125	  (vec_select:V4SI
5126	    (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5127	    (parallel [(const_int 0) (const_int 1)
5128		       (const_int 2) (const_int 3)]))))]
5129  "TARGET_AVX"
5130  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5131  [(set_attr "type" "ssecvt")
5132   (set_attr "prefix" "maybe_evex")
5133   (set_attr "mode" "V4DF")])
5134
5135(define_insn "sse2_cvtdq2pd<mask_name>"
5136  [(set (match_operand:V2DF 0 "register_operand" "=v")
5137	(float:V2DF
5138	  (vec_select:V2SI
5139	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5140	    (parallel [(const_int 0) (const_int 1)]))))]
5141  "TARGET_SSE2 && <mask_avx512vl_condition>"
5142  "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5143  [(set_attr "type" "ssecvt")
5144   (set_attr "prefix" "maybe_vex")
5145   (set_attr "mode" "V2DF")])
5146
5147(define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5148  [(set (match_operand:V8SI 0 "register_operand" "=v")
5149	(unspec:V8SI
5150	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5151	  UNSPEC_FIX_NOTRUNC))]
5152  "TARGET_AVX512F"
5153  "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5154  [(set_attr "type" "ssecvt")
5155   (set_attr "prefix" "evex")
5156   (set_attr "mode" "OI")])
5157
5158(define_insn "avx_cvtpd2dq256<mask_name>"
5159  [(set (match_operand:V4SI 0 "register_operand" "=v")
5160	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5161		     UNSPEC_FIX_NOTRUNC))]
5162  "TARGET_AVX && <mask_avx512vl_condition>"
5163  "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5164  [(set_attr "type" "ssecvt")
5165   (set_attr "prefix" "<mask_prefix>")
5166   (set_attr "mode" "OI")])
5167
5168(define_expand "avx_cvtpd2dq256_2"
5169  [(set (match_operand:V8SI 0 "register_operand")
5170	(vec_concat:V8SI
5171	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5172		       UNSPEC_FIX_NOTRUNC)
5173	  (match_dup 2)))]
5174  "TARGET_AVX"
5175  "operands[2] = CONST0_RTX (V4SImode);")
5176
5177(define_insn "*avx_cvtpd2dq256_2"
5178  [(set (match_operand:V8SI 0 "register_operand" "=v")
5179	(vec_concat:V8SI
5180	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5181		       UNSPEC_FIX_NOTRUNC)
5182	  (match_operand:V4SI 2 "const0_operand")))]
5183  "TARGET_AVX"
5184  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5185  [(set_attr "type" "ssecvt")
5186   (set_attr "prefix" "vex")
5187   (set_attr "btver2_decode" "vector")
5188   (set_attr "mode" "OI")])
5189
5190(define_insn "sse2_cvtpd2dq<mask_name>"
5191  [(set (match_operand:V4SI 0 "register_operand" "=v")
5192	(vec_concat:V4SI
5193	  (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5194		       UNSPEC_FIX_NOTRUNC)
5195	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5196  "TARGET_SSE2 && <mask_avx512vl_condition>"
5197{
5198  if (TARGET_AVX)
5199    return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5200  else
5201    return "cvtpd2dq\t{%1, %0|%0, %1}";
5202}
5203  [(set_attr "type" "ssecvt")
5204   (set_attr "prefix_rep" "1")
5205   (set_attr "prefix_data16" "0")
5206   (set_attr "prefix" "maybe_vex")
5207   (set_attr "mode" "TI")
5208   (set_attr "amdfam10_decode" "double")
5209   (set_attr "athlon_decode" "vector")
5210   (set_attr "bdver1_decode" "double")])
5211
5212;; For ufix_notrunc* insn patterns
5213(define_mode_attr pd2udqsuff
5214  [(V8DF "") (V4DF "{y}")])
5215
5216(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5217  [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5218	(unspec:<si2dfmode>
5219	  [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5220	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5221  "TARGET_AVX512F"
5222  "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5223  [(set_attr "type" "ssecvt")
5224   (set_attr "prefix" "evex")
5225   (set_attr "mode" "<sseinsnmode>")])
5226
5227(define_insn "ufix_notruncv2dfv2si2<mask_name>"
5228  [(set (match_operand:V4SI 0 "register_operand" "=v")
5229	(vec_concat:V4SI
5230	  (unspec:V2SI
5231	    [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5232	    UNSPEC_UNSIGNED_FIX_NOTRUNC)
5233	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5234  "TARGET_AVX512VL"
5235  "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5236  [(set_attr "type" "ssecvt")
5237   (set_attr "prefix" "evex")
5238   (set_attr "mode" "TI")])
5239
5240(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
5241  [(set (match_operand:V8SI 0 "register_operand" "=v")
5242	(any_fix:V8SI
5243	  (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5244  "TARGET_AVX512F"
5245  "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5246  [(set_attr "type" "ssecvt")
5247   (set_attr "prefix" "evex")
5248   (set_attr "mode" "OI")])
5249
5250(define_insn "ufix_truncv2dfv2si2<mask_name>"
5251  [(set (match_operand:V4SI 0 "register_operand" "=v")
5252	(vec_concat:V4SI
5253	  (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5254	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5255  "TARGET_AVX512VL"
5256  "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5257  [(set_attr "type" "ssecvt")
5258   (set_attr "prefix" "evex")
5259   (set_attr "mode" "TI")])
5260
5261(define_insn "fix_truncv4dfv4si2<mask_name>"
5262  [(set (match_operand:V4SI 0 "register_operand" "=v")
5263	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5264  "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5265  "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5266  [(set_attr "type" "ssecvt")
5267   (set_attr "prefix" "maybe_evex")
5268   (set_attr "mode" "OI")])
5269
5270(define_insn "ufix_truncv4dfv4si2<mask_name>"
5271  [(set (match_operand:V4SI 0 "register_operand" "=v")
5272	(unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5273  "TARGET_AVX512VL && TARGET_AVX512F"
5274  "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5275  [(set_attr "type" "ssecvt")
5276   (set_attr "prefix" "maybe_evex")
5277   (set_attr "mode" "OI")])
5278
5279(define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5280  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5281	(any_fix:<sseintvecmode>
5282	  (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5283  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5284  "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5285  [(set_attr "type" "ssecvt")
5286   (set_attr "prefix" "evex")
5287   (set_attr "mode" "<sseintvecmode2>")])
5288
5289(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5290  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5291	(unspec:<sseintvecmode>
5292	  [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5293	  UNSPEC_FIX_NOTRUNC))]
5294  "TARGET_AVX512DQ && <round_mode512bit_condition>"
5295  "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5296  [(set_attr "type" "ssecvt")
5297   (set_attr "prefix" "evex")
5298   (set_attr "mode" "<sseintvecmode2>")])
5299
5300(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5301  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5302	(unspec:<sseintvecmode>
5303	  [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5304	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5305  "TARGET_AVX512DQ && <round_mode512bit_condition>"
5306  "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5307  [(set_attr "type" "ssecvt")
5308   (set_attr "prefix" "evex")
5309   (set_attr "mode" "<sseintvecmode2>")])
5310
5311(define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5312  [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5313	(any_fix:<sselongvecmode>
5314	  (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5315  "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5316  "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5317  [(set_attr "type" "ssecvt")
5318   (set_attr "prefix" "evex")
5319   (set_attr "mode" "<sseintvecmode3>")])
5320
5321(define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5322  [(set (match_operand:V2DI 0 "register_operand" "=v")
5323	(any_fix:V2DI
5324	  (vec_select:V2SF
5325	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5326	    (parallel [(const_int 0) (const_int 1)]))))]
5327  "TARGET_AVX512DQ && TARGET_AVX512VL"
5328  "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5329  [(set_attr "type" "ssecvt")
5330   (set_attr "prefix" "evex")
5331   (set_attr "mode" "TI")])
5332
5333(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5334  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5335	(unsigned_fix:<sseintvecmode>
5336	  (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5337  "TARGET_AVX512VL"
5338  "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5339  [(set_attr "type" "ssecvt")
5340   (set_attr "prefix" "evex")
5341   (set_attr "mode" "<sseintvecmode2>")])
5342
5343(define_expand "avx_cvttpd2dq256_2"
5344  [(set (match_operand:V8SI 0 "register_operand")
5345	(vec_concat:V8SI
5346	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5347	  (match_dup 2)))]
5348  "TARGET_AVX"
5349  "operands[2] = CONST0_RTX (V4SImode);")
5350
5351(define_insn "sse2_cvttpd2dq<mask_name>"
5352  [(set (match_operand:V4SI 0 "register_operand" "=v")
5353	(vec_concat:V4SI
5354	  (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
5355	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5356  "TARGET_SSE2 && <mask_avx512vl_condition>"
5357{
5358  if (TARGET_AVX)
5359    return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5360  else
5361    return "cvttpd2dq\t{%1, %0|%0, %1}";
5362}
5363  [(set_attr "type" "ssecvt")
5364   (set_attr "amdfam10_decode" "double")
5365   (set_attr "athlon_decode" "vector")
5366   (set_attr "bdver1_decode" "double")
5367   (set_attr "prefix" "maybe_vex")
5368   (set_attr "mode" "TI")])
5369
5370(define_insn "sse2_cvtsd2ss<round_name>"
5371  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5372	(vec_merge:V4SF
5373	  (vec_duplicate:V4SF
5374	    (float_truncate:V2SF
5375	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5376	  (match_operand:V4SF 1 "register_operand" "0,0,v")
5377	  (const_int 1)))]
5378  "TARGET_SSE2"
5379  "@
5380   cvtsd2ss\t{%2, %0|%0, %2}
5381   cvtsd2ss\t{%2, %0|%0, %q2}
5382   vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5383  [(set_attr "isa" "noavx,noavx,avx")
5384   (set_attr "type" "ssecvt")
5385   (set_attr "athlon_decode" "vector,double,*")
5386   (set_attr "amdfam10_decode" "vector,double,*")
5387   (set_attr "bdver1_decode" "direct,direct,*")
5388   (set_attr "btver2_decode" "double,double,double")
5389   (set_attr "prefix" "orig,orig,<round_prefix>")
5390   (set_attr "mode" "SF")])
5391
5392(define_insn "*sse2_vd_cvtsd2ss"
5393  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5394	(vec_merge:V4SF
5395	  (vec_duplicate:V4SF
5396	    (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
5397	  (match_operand:V4SF 1 "register_operand" "0,0,v")
5398	  (const_int 1)))]
5399  "TARGET_SSE2"
5400  "@
5401   cvtsd2ss\t{%2, %0|%0, %2}
5402   cvtsd2ss\t{%2, %0|%0, %2}
5403   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
5404  [(set_attr "isa" "noavx,noavx,avx")
5405   (set_attr "type" "ssecvt")
5406   (set_attr "athlon_decode" "vector,double,*")
5407   (set_attr "amdfam10_decode" "vector,double,*")
5408   (set_attr "bdver1_decode" "direct,direct,*")
5409   (set_attr "btver2_decode" "double,double,double")
5410   (set_attr "prefix" "orig,orig,vex")
5411   (set_attr "mode" "SF")])
5412
5413(define_insn "sse2_cvtss2sd<round_saeonly_name>"
5414  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5415	(vec_merge:V2DF
5416	  (float_extend:V2DF
5417	    (vec_select:V2SF
5418	      (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
5419	      (parallel [(const_int 0) (const_int 1)])))
5420	  (match_operand:V2DF 1 "register_operand" "0,0,v")
5421	  (const_int 1)))]
5422  "TARGET_SSE2"
5423  "@
5424   cvtss2sd\t{%2, %0|%0, %2}
5425   cvtss2sd\t{%2, %0|%0, %k2}
5426   vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5427  [(set_attr "isa" "noavx,noavx,avx")
5428   (set_attr "type" "ssecvt")
5429   (set_attr "amdfam10_decode" "vector,double,*")
5430   (set_attr "athlon_decode" "direct,direct,*")
5431   (set_attr "bdver1_decode" "direct,direct,*")
5432   (set_attr "btver2_decode" "double,double,double")
5433   (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5434   (set_attr "mode" "DF")])
5435
5436(define_insn "*sse2_vd_cvtss2sd"
5437  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5438	(vec_merge:V2DF
5439	  (vec_duplicate:V2DF
5440	    (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
5441	  (match_operand:V2DF 1 "register_operand" "0,0,v")
5442	  (const_int 1)))]
5443  "TARGET_SSE2"
5444  "@
5445   cvtss2sd\t{%2, %0|%0, %2}
5446   cvtss2sd\t{%2, %0|%0, %2}
5447   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
5448  [(set_attr "isa" "noavx,noavx,avx")
5449   (set_attr "type" "ssecvt")
5450   (set_attr "amdfam10_decode" "vector,double,*")
5451   (set_attr "athlon_decode" "direct,direct,*")
5452   (set_attr "bdver1_decode" "direct,direct,*")
5453   (set_attr "btver2_decode" "double,double,double")
5454   (set_attr "prefix" "orig,orig,vex")
5455   (set_attr "mode" "DF")])
5456
5457(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5458  [(set (match_operand:V8SF 0 "register_operand" "=v")
5459	(float_truncate:V8SF
5460	  (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5461  "TARGET_AVX512F"
5462  "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5463  [(set_attr "type" "ssecvt")
5464   (set_attr "prefix" "evex")
5465   (set_attr "mode" "V8SF")])
5466
5467(define_insn "avx_cvtpd2ps256<mask_name>"
5468  [(set (match_operand:V4SF 0 "register_operand" "=v")
5469	(float_truncate:V4SF
5470	  (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5471  "TARGET_AVX && <mask_avx512vl_condition>"
5472  "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5473  [(set_attr "type" "ssecvt")
5474   (set_attr "prefix" "maybe_evex")
5475   (set_attr "btver2_decode" "vector")
5476   (set_attr "mode" "V4SF")])
5477
5478(define_expand "sse2_cvtpd2ps"
5479  [(set (match_operand:V4SF 0 "register_operand")
5480	(vec_concat:V4SF
5481	  (float_truncate:V2SF
5482	    (match_operand:V2DF 1 "vector_operand"))
5483	  (match_dup 2)))]
5484  "TARGET_SSE2"
5485  "operands[2] = CONST0_RTX (V2SFmode);")
5486
5487(define_expand "sse2_cvtpd2ps_mask"
5488  [(set (match_operand:V4SF 0 "register_operand")
5489	(vec_merge:V4SF
5490	  (vec_concat:V4SF
5491	    (float_truncate:V2SF
5492	      (match_operand:V2DF 1 "vector_operand"))
5493	    (match_dup 4))
5494	  (match_operand:V4SF 2 "register_operand")
5495	  (match_operand:QI 3 "register_operand")))]
5496  "TARGET_SSE2"
5497  "operands[4] = CONST0_RTX (V2SFmode);")
5498
5499(define_insn "*sse2_cvtpd2ps<mask_name>"
5500  [(set (match_operand:V4SF 0 "register_operand" "=v")
5501	(vec_concat:V4SF
5502	  (float_truncate:V2SF
5503	    (match_operand:V2DF 1 "vector_operand" "vBm"))
5504	  (match_operand:V2SF 2 "const0_operand")))]
5505  "TARGET_SSE2 && <mask_avx512vl_condition>"
5506{
5507  if (TARGET_AVX)
5508    return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5509  else
5510    return "cvtpd2ps\t{%1, %0|%0, %1}";
5511}
5512  [(set_attr "type" "ssecvt")
5513   (set_attr "amdfam10_decode" "double")
5514   (set_attr "athlon_decode" "vector")
5515   (set_attr "bdver1_decode" "double")
5516   (set_attr "prefix_data16" "1")
5517   (set_attr "prefix" "maybe_vex")
5518   (set_attr "mode" "V4SF")])
5519
5520;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5521(define_mode_attr sf2dfmode
5522  [(V8DF "V8SF") (V4DF "V4SF")])
5523
5524(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5525  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5526	(float_extend:VF2_512_256
5527	  (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5528  "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5529  "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5530  [(set_attr "type" "ssecvt")
5531   (set_attr "prefix" "maybe_vex")
5532   (set_attr "mode" "<MODE>")])
5533
5534(define_insn "*avx_cvtps2pd256_2"
5535  [(set (match_operand:V4DF 0 "register_operand" "=v")
5536	(float_extend:V4DF
5537	  (vec_select:V4SF
5538	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5539	    (parallel [(const_int 0) (const_int 1)
5540		       (const_int 2) (const_int 3)]))))]
5541  "TARGET_AVX"
5542  "vcvtps2pd\t{%x1, %0|%0, %x1}"
5543  [(set_attr "type" "ssecvt")
5544   (set_attr "prefix" "vex")
5545   (set_attr "mode" "V4DF")])
5546
5547(define_insn "vec_unpacks_lo_v16sf"
5548  [(set (match_operand:V8DF 0 "register_operand" "=v")
5549	(float_extend:V8DF
5550	  (vec_select:V8SF
5551	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5552	    (parallel [(const_int 0) (const_int 1)
5553		       (const_int 2) (const_int 3)
5554		       (const_int 4) (const_int 5)
5555		       (const_int 6) (const_int 7)]))))]
5556  "TARGET_AVX512F"
5557  "vcvtps2pd\t{%t1, %0|%0, %t1}"
5558  [(set_attr "type" "ssecvt")
5559   (set_attr "prefix" "evex")
5560   (set_attr "mode" "V8DF")])
5561
5562(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5563  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5564	(unspec:<avx512fmaskmode>
5565	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5566	 UNSPEC_CVTINT2MASK))]
5567  "TARGET_AVX512BW"
5568  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5569  [(set_attr "prefix" "evex")
5570   (set_attr "mode" "<sseinsnmode>")])
5571
5572(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5573  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5574	(unspec:<avx512fmaskmode>
5575	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5576	 UNSPEC_CVTINT2MASK))]
5577  "TARGET_AVX512DQ"
5578  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5579  [(set_attr "prefix" "evex")
5580   (set_attr "mode" "<sseinsnmode>")])
5581
5582(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5583  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5584	(vec_merge:VI12_AVX512VL
5585	  (match_dup 2)
5586	  (match_dup 3)
5587	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5588  "TARGET_AVX512BW"
5589  {
5590    operands[2] = CONSTM1_RTX (<MODE>mode);
5591    operands[3] = CONST0_RTX (<MODE>mode);
5592  })
5593
5594(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5595  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5596	(vec_merge:VI12_AVX512VL
5597	  (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5598	  (match_operand:VI12_AVX512VL 3 "const0_operand")
5599	  (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5600  "TARGET_AVX512BW"
5601  "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5602  [(set_attr "prefix" "evex")
5603   (set_attr "mode" "<sseinsnmode>")])
5604
5605(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5606  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5607	(vec_merge:VI48_AVX512VL
5608	  (match_dup 2)
5609	  (match_dup 3)
5610	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5611  "TARGET_AVX512DQ"
5612  "{
5613    operands[2] = CONSTM1_RTX (<MODE>mode);
5614    operands[3] = CONST0_RTX (<MODE>mode);
5615  }")
5616
5617(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5618  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5619	(vec_merge:VI48_AVX512VL
5620	  (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5621	  (match_operand:VI48_AVX512VL 3 "const0_operand")
5622	  (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5623  "TARGET_AVX512DQ"
5624  "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5625  [(set_attr "prefix" "evex")
5626   (set_attr "mode" "<sseinsnmode>")])
5627
5628(define_insn "sse2_cvtps2pd<mask_name>"
5629  [(set (match_operand:V2DF 0 "register_operand" "=v")
5630	(float_extend:V2DF
5631	  (vec_select:V2SF
5632	    (match_operand:V4SF 1 "vector_operand" "vm")
5633	    (parallel [(const_int 0) (const_int 1)]))))]
5634  "TARGET_SSE2 && <mask_avx512vl_condition>"
5635  "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5636  [(set_attr "type" "ssecvt")
5637   (set_attr "amdfam10_decode" "direct")
5638   (set_attr "athlon_decode" "double")
5639   (set_attr "bdver1_decode" "double")
5640   (set_attr "prefix_data16" "0")
5641   (set_attr "prefix" "maybe_vex")
5642   (set_attr "mode" "V2DF")])
5643
5644(define_expand "vec_unpacks_hi_v4sf"
5645  [(set (match_dup 2)
5646   (vec_select:V4SF
5647     (vec_concat:V8SF
5648       (match_dup 2)
5649       (match_operand:V4SF 1 "vector_operand"))
5650     (parallel [(const_int 6) (const_int 7)
5651		(const_int 2) (const_int 3)])))
5652  (set (match_operand:V2DF 0 "register_operand")
5653   (float_extend:V2DF
5654     (vec_select:V2SF
5655       (match_dup 2)
5656       (parallel [(const_int 0) (const_int 1)]))))]
5657  "TARGET_SSE2"
5658  "operands[2] = gen_reg_rtx (V4SFmode);")
5659
5660(define_expand "vec_unpacks_hi_v8sf"
5661  [(set (match_dup 2)
5662	(vec_select:V4SF
5663	  (match_operand:V8SF 1 "register_operand")
5664	  (parallel [(const_int 4) (const_int 5)
5665		     (const_int 6) (const_int 7)])))
5666   (set (match_operand:V4DF 0 "register_operand")
5667	(float_extend:V4DF
5668	  (match_dup 2)))]
5669  "TARGET_AVX"
5670  "operands[2] = gen_reg_rtx (V4SFmode);")
5671
5672(define_expand "vec_unpacks_hi_v16sf"
5673  [(set (match_dup 2)
5674	(vec_select:V8SF
5675	  (match_operand:V16SF 1 "register_operand")
5676	  (parallel [(const_int 8) (const_int 9)
5677		     (const_int 10) (const_int 11)
5678		     (const_int 12) (const_int 13)
5679		     (const_int 14) (const_int 15)])))
5680   (set (match_operand:V8DF 0 "register_operand")
5681	(float_extend:V8DF
5682	  (match_dup 2)))]
5683"TARGET_AVX512F"
5684"operands[2] = gen_reg_rtx (V8SFmode);")
5685
5686(define_expand "vec_unpacks_lo_v4sf"
5687  [(set (match_operand:V2DF 0 "register_operand")
5688	(float_extend:V2DF
5689	  (vec_select:V2SF
5690	    (match_operand:V4SF 1 "vector_operand")
5691	    (parallel [(const_int 0) (const_int 1)]))))]
5692  "TARGET_SSE2")
5693
5694(define_expand "vec_unpacks_lo_v8sf"
5695  [(set (match_operand:V4DF 0 "register_operand")
5696	(float_extend:V4DF
5697	  (vec_select:V4SF
5698	    (match_operand:V8SF 1 "nonimmediate_operand")
5699	    (parallel [(const_int 0) (const_int 1)
5700		       (const_int 2) (const_int 3)]))))]
5701  "TARGET_AVX")
5702
5703(define_mode_attr sseunpackfltmode
5704  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5705  (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5706
5707(define_expand "vec_unpacks_float_hi_<mode>"
5708  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5709   (match_operand:VI2_AVX512F 1 "register_operand")]
5710  "TARGET_SSE2"
5711{
5712  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5713
5714  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5715  emit_insn (gen_rtx_SET (operands[0],
5716			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5717  DONE;
5718})
5719
5720(define_expand "vec_unpacks_float_lo_<mode>"
5721  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5722   (match_operand:VI2_AVX512F 1 "register_operand")]
5723  "TARGET_SSE2"
5724{
5725  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5726
5727  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5728  emit_insn (gen_rtx_SET (operands[0],
5729			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5730  DONE;
5731})
5732
5733(define_expand "vec_unpacku_float_hi_<mode>"
5734  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5735   (match_operand:VI2_AVX512F 1 "register_operand")]
5736  "TARGET_SSE2"
5737{
5738  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5739
5740  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5741  emit_insn (gen_rtx_SET (operands[0],
5742			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5743  DONE;
5744})
5745
5746(define_expand "vec_unpacku_float_lo_<mode>"
5747  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5748   (match_operand:VI2_AVX512F 1 "register_operand")]
5749  "TARGET_SSE2"
5750{
5751  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5752
5753  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5754  emit_insn (gen_rtx_SET (operands[0],
5755			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5756  DONE;
5757})
5758
5759(define_expand "vec_unpacks_float_hi_v4si"
5760  [(set (match_dup 2)
5761	(vec_select:V4SI
5762	  (match_operand:V4SI 1 "vector_operand")
5763	  (parallel [(const_int 2) (const_int 3)
5764		     (const_int 2) (const_int 3)])))
5765   (set (match_operand:V2DF 0 "register_operand")
5766	(float:V2DF
5767	  (vec_select:V2SI
5768	  (match_dup 2)
5769	    (parallel [(const_int 0) (const_int 1)]))))]
5770  "TARGET_SSE2"
5771  "operands[2] = gen_reg_rtx (V4SImode);")
5772
5773(define_expand "vec_unpacks_float_lo_v4si"
5774  [(set (match_operand:V2DF 0 "register_operand")
5775	(float:V2DF
5776	  (vec_select:V2SI
5777	    (match_operand:V4SI 1 "vector_operand")
5778	    (parallel [(const_int 0) (const_int 1)]))))]
5779  "TARGET_SSE2")
5780
5781(define_expand "vec_unpacks_float_hi_v8si"
5782  [(set (match_dup 2)
5783	(vec_select:V4SI
5784	  (match_operand:V8SI 1 "vector_operand")
5785	  (parallel [(const_int 4) (const_int 5)
5786		     (const_int 6) (const_int 7)])))
5787   (set (match_operand:V4DF 0 "register_operand")
5788	(float:V4DF
5789	  (match_dup 2)))]
5790  "TARGET_AVX"
5791  "operands[2] = gen_reg_rtx (V4SImode);")
5792
5793(define_expand "vec_unpacks_float_lo_v8si"
5794  [(set (match_operand:V4DF 0 "register_operand")
5795	(float:V4DF
5796	  (vec_select:V4SI
5797	    (match_operand:V8SI 1 "nonimmediate_operand")
5798	    (parallel [(const_int 0) (const_int 1)
5799		       (const_int 2) (const_int 3)]))))]
5800  "TARGET_AVX")
5801
5802(define_expand "vec_unpacks_float_hi_v16si"
5803  [(set (match_dup 2)
5804	(vec_select:V8SI
5805	  (match_operand:V16SI 1 "nonimmediate_operand")
5806	  (parallel [(const_int 8) (const_int 9)
5807		     (const_int 10) (const_int 11)
5808		     (const_int 12) (const_int 13)
5809		     (const_int 14) (const_int 15)])))
5810   (set (match_operand:V8DF 0 "register_operand")
5811	(float:V8DF
5812	  (match_dup 2)))]
5813  "TARGET_AVX512F"
5814  "operands[2] = gen_reg_rtx (V8SImode);")
5815
5816(define_expand "vec_unpacks_float_lo_v16si"
5817  [(set (match_operand:V8DF 0 "register_operand")
5818	(float:V8DF
5819	  (vec_select:V8SI
5820	    (match_operand:V16SI 1 "nonimmediate_operand")
5821	    (parallel [(const_int 0) (const_int 1)
5822		       (const_int 2) (const_int 3)
5823		       (const_int 4) (const_int 5)
5824		       (const_int 6) (const_int 7)]))))]
5825  "TARGET_AVX512F")
5826
5827(define_expand "vec_unpacku_float_hi_v4si"
5828  [(set (match_dup 5)
5829	(vec_select:V4SI
5830	  (match_operand:V4SI 1 "vector_operand")
5831	  (parallel [(const_int 2) (const_int 3)
5832		     (const_int 2) (const_int 3)])))
5833   (set (match_dup 6)
5834	(float:V2DF
5835	  (vec_select:V2SI
5836	  (match_dup 5)
5837	    (parallel [(const_int 0) (const_int 1)]))))
5838   (set (match_dup 7)
5839	(lt:V2DF (match_dup 6) (match_dup 3)))
5840   (set (match_dup 8)
5841	(and:V2DF (match_dup 7) (match_dup 4)))
5842   (set (match_operand:V2DF 0 "register_operand")
5843	(plus:V2DF (match_dup 6) (match_dup 8)))]
5844  "TARGET_SSE2"
5845{
5846  REAL_VALUE_TYPE TWO32r;
5847  rtx x;
5848  int i;
5849
5850  real_ldexp (&TWO32r, &dconst1, 32);
5851  x = const_double_from_real_value (TWO32r, DFmode);
5852
5853  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5854  operands[4] = force_reg (V2DFmode,
5855			   ix86_build_const_vector (V2DFmode, 1, x));
5856
5857  operands[5] = gen_reg_rtx (V4SImode);
5858
5859  for (i = 6; i < 9; i++)
5860    operands[i] = gen_reg_rtx (V2DFmode);
5861})
5862
5863(define_expand "vec_unpacku_float_lo_v4si"
5864  [(set (match_dup 5)
5865	(float:V2DF
5866	  (vec_select:V2SI
5867	    (match_operand:V4SI 1 "vector_operand")
5868	    (parallel [(const_int 0) (const_int 1)]))))
5869   (set (match_dup 6)
5870	(lt:V2DF (match_dup 5) (match_dup 3)))
5871   (set (match_dup 7)
5872	(and:V2DF (match_dup 6) (match_dup 4)))
5873   (set (match_operand:V2DF 0 "register_operand")
5874	(plus:V2DF (match_dup 5) (match_dup 7)))]
5875  "TARGET_SSE2"
5876{
5877  REAL_VALUE_TYPE TWO32r;
5878  rtx x;
5879  int i;
5880
5881  real_ldexp (&TWO32r, &dconst1, 32);
5882  x = const_double_from_real_value (TWO32r, DFmode);
5883
5884  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5885  operands[4] = force_reg (V2DFmode,
5886			   ix86_build_const_vector (V2DFmode, 1, x));
5887
5888  for (i = 5; i < 8; i++)
5889    operands[i] = gen_reg_rtx (V2DFmode);
5890})
5891
5892(define_expand "vec_unpacku_float_hi_v8si"
5893  [(match_operand:V4DF 0 "register_operand")
5894   (match_operand:V8SI 1 "register_operand")]
5895  "TARGET_AVX"
5896{
5897  REAL_VALUE_TYPE TWO32r;
5898  rtx x, tmp[6];
5899  int i;
5900
5901  real_ldexp (&TWO32r, &dconst1, 32);
5902  x = const_double_from_real_value (TWO32r, DFmode);
5903
5904  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5905  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5906  tmp[5] = gen_reg_rtx (V4SImode);
5907
5908  for (i = 2; i < 5; i++)
5909    tmp[i] = gen_reg_rtx (V4DFmode);
5910  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5911  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5912  emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5913  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5914  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5915  DONE;
5916})
5917
5918(define_expand "vec_unpacku_float_hi_v16si"
5919  [(match_operand:V8DF 0 "register_operand")
5920   (match_operand:V16SI 1 "register_operand")]
5921  "TARGET_AVX512F"
5922{
5923  REAL_VALUE_TYPE TWO32r;
5924  rtx k, x, tmp[4];
5925
5926  real_ldexp (&TWO32r, &dconst1, 32);
5927  x = const_double_from_real_value (TWO32r, DFmode);
5928
5929  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5930  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5931  tmp[2] = gen_reg_rtx (V8DFmode);
5932  tmp[3] = gen_reg_rtx (V8SImode);
5933  k = gen_reg_rtx (QImode);
5934
5935  emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5936  emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5937  emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5938  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5939  emit_move_insn (operands[0], tmp[2]);
5940  DONE;
5941})
5942
5943(define_expand "vec_unpacku_float_lo_v8si"
5944  [(match_operand:V4DF 0 "register_operand")
5945   (match_operand:V8SI 1 "nonimmediate_operand")]
5946  "TARGET_AVX"
5947{
5948  REAL_VALUE_TYPE TWO32r;
5949  rtx x, tmp[5];
5950  int i;
5951
5952  real_ldexp (&TWO32r, &dconst1, 32);
5953  x = const_double_from_real_value (TWO32r, DFmode);
5954
5955  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5956  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5957
5958  for (i = 2; i < 5; i++)
5959    tmp[i] = gen_reg_rtx (V4DFmode);
5960  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5961  emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5962  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5963  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5964  DONE;
5965})
5966
5967(define_expand "vec_unpacku_float_lo_v16si"
5968  [(match_operand:V8DF 0 "register_operand")
5969   (match_operand:V16SI 1 "nonimmediate_operand")]
5970  "TARGET_AVX512F"
5971{
5972  REAL_VALUE_TYPE TWO32r;
5973  rtx k, x, tmp[3];
5974
5975  real_ldexp (&TWO32r, &dconst1, 32);
5976  x = const_double_from_real_value (TWO32r, DFmode);
5977
5978  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5979  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5980  tmp[2] = gen_reg_rtx (V8DFmode);
5981  k = gen_reg_rtx (QImode);
5982
5983  emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5984  emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5985  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5986  emit_move_insn (operands[0], tmp[2]);
5987  DONE;
5988})
5989
5990(define_expand "vec_pack_trunc_<mode>"
5991  [(set (match_dup 3)
5992	(float_truncate:<sf2dfmode>
5993	  (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5994   (set (match_dup 4)
5995	(float_truncate:<sf2dfmode>
5996	  (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5997   (set (match_operand:<ssePSmode> 0 "register_operand")
5998	(vec_concat:<ssePSmode>
5999	  (match_dup 3)
6000	  (match_dup 4)))]
6001  "TARGET_AVX"
6002{
6003  operands[3] = gen_reg_rtx (<sf2dfmode>mode);
6004  operands[4] = gen_reg_rtx (<sf2dfmode>mode);
6005})
6006
6007(define_expand "vec_pack_trunc_v2df"
6008  [(match_operand:V4SF 0 "register_operand")
6009   (match_operand:V2DF 1 "vector_operand")
6010   (match_operand:V2DF 2 "vector_operand")]
6011  "TARGET_SSE2"
6012{
6013  rtx tmp0, tmp1;
6014
6015  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6016    {
6017      tmp0 = gen_reg_rtx (V4DFmode);
6018      tmp1 = force_reg (V2DFmode, operands[1]);
6019
6020      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6021      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
6022    }
6023  else
6024    {
6025      tmp0 = gen_reg_rtx (V4SFmode);
6026      tmp1 = gen_reg_rtx (V4SFmode);
6027
6028      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
6029      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
6030      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
6031    }
6032  DONE;
6033})
6034
6035(define_expand "vec_pack_sfix_trunc_v8df"
6036  [(match_operand:V16SI 0 "register_operand")
6037   (match_operand:V8DF 1 "nonimmediate_operand")
6038   (match_operand:V8DF 2 "nonimmediate_operand")]
6039  "TARGET_AVX512F"
6040{
6041  rtx r1, r2;
6042
6043  r1 = gen_reg_rtx (V8SImode);
6044  r2 = gen_reg_rtx (V8SImode);
6045
6046  emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
6047  emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
6048  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6049  DONE;
6050})
6051
6052(define_expand "vec_pack_sfix_trunc_v4df"
6053  [(match_operand:V8SI 0 "register_operand")
6054   (match_operand:V4DF 1 "nonimmediate_operand")
6055   (match_operand:V4DF 2 "nonimmediate_operand")]
6056  "TARGET_AVX"
6057{
6058  rtx r1, r2;
6059
6060  r1 = gen_reg_rtx (V4SImode);
6061  r2 = gen_reg_rtx (V4SImode);
6062
6063  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6064  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6065  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6066  DONE;
6067})
6068
6069(define_expand "vec_pack_sfix_trunc_v2df"
6070  [(match_operand:V4SI 0 "register_operand")
6071   (match_operand:V2DF 1 "vector_operand")
6072   (match_operand:V2DF 2 "vector_operand")]
6073  "TARGET_SSE2"
6074{
6075  rtx tmp0, tmp1, tmp2;
6076
6077  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6078    {
6079      tmp0 = gen_reg_rtx (V4DFmode);
6080      tmp1 = force_reg (V2DFmode, operands[1]);
6081
6082      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6083      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6084    }
6085  else
6086    {
6087      tmp0 = gen_reg_rtx (V4SImode);
6088      tmp1 = gen_reg_rtx (V4SImode);
6089      tmp2 = gen_reg_rtx (V2DImode);
6090
6091      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6092      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6093      emit_insn (gen_vec_interleave_lowv2di (tmp2,
6094					     gen_lowpart (V2DImode, tmp0),
6095					     gen_lowpart (V2DImode, tmp1)));
6096      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6097    }
6098  DONE;
6099})
6100
6101(define_mode_attr ssepackfltmode
6102  [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6103
6104(define_expand "vec_pack_ufix_trunc_<mode>"
6105  [(match_operand:<ssepackfltmode> 0 "register_operand")
6106   (match_operand:VF2 1 "register_operand")
6107   (match_operand:VF2 2 "register_operand")]
6108  "TARGET_SSE2"
6109{
6110  if (<MODE>mode == V8DFmode)
6111    {
6112      rtx r1, r2;
6113
6114      r1 = gen_reg_rtx (V8SImode);
6115      r2 = gen_reg_rtx (V8SImode);
6116
6117      emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
6118      emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
6119      emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6120    }
6121  else
6122    {
6123      rtx tmp[7];
6124      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6125      tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6126      tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6127      emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6128      if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6129	{
6130	  tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6131	  ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6132	}
6133      else
6134	{
6135	  tmp[5] = gen_reg_rtx (V8SFmode);
6136	  ix86_expand_vec_extract_even_odd (tmp[5],
6137					    gen_lowpart (V8SFmode, tmp[2]),
6138					    gen_lowpart (V8SFmode, tmp[3]), 0);
6139	  tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6140	}
6141      tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6142				    operands[0], 0, OPTAB_DIRECT);
6143      if (tmp[6] != operands[0])
6144	emit_move_insn (operands[0], tmp[6]);
6145    }
6146
6147  DONE;
6148})
6149
6150(define_expand "avx512f_vec_pack_sfix_v8df"
6151  [(match_operand:V16SI 0 "register_operand")
6152   (match_operand:V8DF 1 "nonimmediate_operand")
6153   (match_operand:V8DF 2 "nonimmediate_operand")]
6154  "TARGET_AVX512F"
6155{
6156  rtx r1, r2;
6157
6158  r1 = gen_reg_rtx (V8SImode);
6159  r2 = gen_reg_rtx (V8SImode);
6160
6161  emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6162  emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6163  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6164  DONE;
6165})
6166
6167(define_expand "vec_pack_sfix_v4df"
6168  [(match_operand:V8SI 0 "register_operand")
6169   (match_operand:V4DF 1 "nonimmediate_operand")
6170   (match_operand:V4DF 2 "nonimmediate_operand")]
6171  "TARGET_AVX"
6172{
6173  rtx r1, r2;
6174
6175  r1 = gen_reg_rtx (V4SImode);
6176  r2 = gen_reg_rtx (V4SImode);
6177
6178  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6179  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6180  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6181  DONE;
6182})
6183
6184(define_expand "vec_pack_sfix_v2df"
6185  [(match_operand:V4SI 0 "register_operand")
6186   (match_operand:V2DF 1 "vector_operand")
6187   (match_operand:V2DF 2 "vector_operand")]
6188  "TARGET_SSE2"
6189{
6190  rtx tmp0, tmp1, tmp2;
6191
6192  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6193    {
6194      tmp0 = gen_reg_rtx (V4DFmode);
6195      tmp1 = force_reg (V2DFmode, operands[1]);
6196
6197      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6198      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6199    }
6200  else
6201    {
6202      tmp0 = gen_reg_rtx (V4SImode);
6203      tmp1 = gen_reg_rtx (V4SImode);
6204      tmp2 = gen_reg_rtx (V2DImode);
6205
6206      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6207      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6208      emit_insn (gen_vec_interleave_lowv2di (tmp2,
6209					     gen_lowpart (V2DImode, tmp0),
6210					     gen_lowpart (V2DImode, tmp1)));
6211      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6212    }
6213  DONE;
6214})
6215
6216;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6217;;
6218;; Parallel single-precision floating point element swizzling
6219;;
6220;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6221
6222(define_expand "sse_movhlps_exp"
6223  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6224	(vec_select:V4SF
6225	  (vec_concat:V8SF
6226	    (match_operand:V4SF 1 "nonimmediate_operand")
6227	    (match_operand:V4SF 2 "nonimmediate_operand"))
6228	  (parallel [(const_int 6)
6229		     (const_int 7)
6230		     (const_int 2)
6231		     (const_int 3)])))]
6232  "TARGET_SSE"
6233{
6234  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6235
6236  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6237
6238  /* Fix up the destination if needed.  */
6239  if (dst != operands[0])
6240    emit_move_insn (operands[0], dst);
6241
6242  DONE;
6243})
6244
6245(define_insn "sse_movhlps"
6246  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
6247	(vec_select:V4SF
6248	  (vec_concat:V8SF
6249	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6250	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6251	  (parallel [(const_int 6)
6252		     (const_int 7)
6253		     (const_int 2)
6254		     (const_int 3)])))]
6255  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6256  "@
6257   movhlps\t{%2, %0|%0, %2}
6258   vmovhlps\t{%2, %1, %0|%0, %1, %2}
6259   movlps\t{%H2, %0|%0, %H2}
6260   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6261   %vmovhps\t{%2, %0|%q0, %2}"
6262  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6263   (set_attr "type" "ssemov")
6264   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6265   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6266
6267(define_expand "sse_movlhps_exp"
6268  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6269	(vec_select:V4SF
6270	  (vec_concat:V8SF
6271	    (match_operand:V4SF 1 "nonimmediate_operand")
6272	    (match_operand:V4SF 2 "nonimmediate_operand"))
6273	  (parallel [(const_int 0)
6274		     (const_int 1)
6275		     (const_int 4)
6276		     (const_int 5)])))]
6277  "TARGET_SSE"
6278{
6279  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6280
6281  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6282
6283  /* Fix up the destination if needed.  */
6284  if (dst != operands[0])
6285    emit_move_insn (operands[0], dst);
6286
6287  DONE;
6288})
6289
6290(define_insn "sse_movlhps"
6291  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
6292	(vec_select:V4SF
6293	  (vec_concat:V8SF
6294	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6295	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6296	  (parallel [(const_int 0)
6297		     (const_int 1)
6298		     (const_int 4)
6299		     (const_int 5)])))]
6300  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6301  "@
6302   movlhps\t{%2, %0|%0, %2}
6303   vmovlhps\t{%2, %1, %0|%0, %1, %2}
6304   movhps\t{%2, %0|%0, %q2}
6305   vmovhps\t{%2, %1, %0|%0, %1, %q2}
6306   %vmovlps\t{%2, %H0|%H0, %2}"
6307  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6308   (set_attr "type" "ssemov")
6309   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6310   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6311
6312(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6313  [(set (match_operand:V16SF 0 "register_operand" "=v")
6314	(vec_select:V16SF
6315	  (vec_concat:V32SF
6316	    (match_operand:V16SF 1 "register_operand" "v")
6317	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6318	  (parallel [(const_int 2) (const_int 18)
6319		     (const_int 3) (const_int 19)
6320		     (const_int 6) (const_int 22)
6321		     (const_int 7) (const_int 23)
6322		     (const_int 10) (const_int 26)
6323		     (const_int 11) (const_int 27)
6324		     (const_int 14) (const_int 30)
6325		     (const_int 15) (const_int 31)])))]
6326  "TARGET_AVX512F"
6327  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6328  [(set_attr "type" "sselog")
6329   (set_attr "prefix" "evex")
6330   (set_attr "mode" "V16SF")])
6331
6332;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6333(define_insn "avx_unpckhps256<mask_name>"
6334  [(set (match_operand:V8SF 0 "register_operand" "=v")
6335	(vec_select:V8SF
6336	  (vec_concat:V16SF
6337	    (match_operand:V8SF 1 "register_operand" "v")
6338	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6339	  (parallel [(const_int 2) (const_int 10)
6340		     (const_int 3) (const_int 11)
6341		     (const_int 6) (const_int 14)
6342		     (const_int 7) (const_int 15)])))]
6343  "TARGET_AVX && <mask_avx512vl_condition>"
6344  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6345  [(set_attr "type" "sselog")
6346   (set_attr "prefix" "vex")
6347   (set_attr "mode" "V8SF")])
6348
6349(define_expand "vec_interleave_highv8sf"
6350  [(set (match_dup 3)
6351	(vec_select:V8SF
6352	  (vec_concat:V16SF
6353	    (match_operand:V8SF 1 "register_operand")
6354	    (match_operand:V8SF 2 "nonimmediate_operand"))
6355	  (parallel [(const_int 0) (const_int 8)
6356		     (const_int 1) (const_int 9)
6357		     (const_int 4) (const_int 12)
6358		     (const_int 5) (const_int 13)])))
6359   (set (match_dup 4)
6360	(vec_select:V8SF
6361	  (vec_concat:V16SF
6362	    (match_dup 1)
6363	    (match_dup 2))
6364	  (parallel [(const_int 2) (const_int 10)
6365		     (const_int 3) (const_int 11)
6366		     (const_int 6) (const_int 14)
6367		     (const_int 7) (const_int 15)])))
6368   (set (match_operand:V8SF 0 "register_operand")
6369	(vec_select:V8SF
6370	  (vec_concat:V16SF
6371	    (match_dup 3)
6372	    (match_dup 4))
6373	  (parallel [(const_int 4) (const_int 5)
6374		     (const_int 6) (const_int 7)
6375		     (const_int 12) (const_int 13)
6376		     (const_int 14) (const_int 15)])))]
6377 "TARGET_AVX"
6378{
6379  operands[3] = gen_reg_rtx (V8SFmode);
6380  operands[4] = gen_reg_rtx (V8SFmode);
6381})
6382
6383(define_insn "vec_interleave_highv4sf<mask_name>"
6384  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6385	(vec_select:V4SF
6386	  (vec_concat:V8SF
6387	    (match_operand:V4SF 1 "register_operand" "0,v")
6388	    (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6389	  (parallel [(const_int 2) (const_int 6)
6390		     (const_int 3) (const_int 7)])))]
6391  "TARGET_SSE && <mask_avx512vl_condition>"
6392  "@
6393   unpckhps\t{%2, %0|%0, %2}
6394   vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6395  [(set_attr "isa" "noavx,avx")
6396   (set_attr "type" "sselog")
6397   (set_attr "prefix" "orig,vex")
6398   (set_attr "mode" "V4SF")])
6399
6400(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6401  [(set (match_operand:V16SF 0 "register_operand" "=v")
6402	(vec_select:V16SF
6403	  (vec_concat:V32SF
6404	    (match_operand:V16SF 1 "register_operand" "v")
6405	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6406	  (parallel [(const_int 0) (const_int 16)
6407		     (const_int 1) (const_int 17)
6408		     (const_int 4) (const_int 20)
6409		     (const_int 5) (const_int 21)
6410		     (const_int 8) (const_int 24)
6411		     (const_int 9) (const_int 25)
6412		     (const_int 12) (const_int 28)
6413		     (const_int 13) (const_int 29)])))]
6414  "TARGET_AVX512F"
6415  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6416  [(set_attr "type" "sselog")
6417   (set_attr "prefix" "evex")
6418   (set_attr "mode" "V16SF")])
6419
6420;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6421(define_insn "avx_unpcklps256<mask_name>"
6422  [(set (match_operand:V8SF 0 "register_operand" "=v")
6423	(vec_select:V8SF
6424	  (vec_concat:V16SF
6425	    (match_operand:V8SF 1 "register_operand" "v")
6426	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6427	  (parallel [(const_int 0) (const_int 8)
6428		     (const_int 1) (const_int 9)
6429		     (const_int 4) (const_int 12)
6430		     (const_int 5) (const_int 13)])))]
6431  "TARGET_AVX && <mask_avx512vl_condition>"
6432  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6433  [(set_attr "type" "sselog")
6434   (set_attr "prefix" "vex")
6435   (set_attr "mode" "V8SF")])
6436
6437(define_insn "unpcklps128_mask"
6438  [(set (match_operand:V4SF 0 "register_operand" "=v")
6439	(vec_merge:V4SF
6440	  (vec_select:V4SF
6441	    (vec_concat:V8SF
6442	      (match_operand:V4SF 1 "register_operand" "v")
6443	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6444	    (parallel [(const_int 0) (const_int 4)
6445		      (const_int 1) (const_int 5)]))
6446	  (match_operand:V4SF 3 "vector_move_operand" "0C")
6447	  (match_operand:QI 4 "register_operand" "Yk")))]
6448  "TARGET_AVX512VL"
6449  "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6450  [(set_attr "type" "sselog")
6451   (set_attr "prefix" "evex")
6452   (set_attr "mode" "V4SF")])
6453
6454(define_expand "vec_interleave_lowv8sf"
6455  [(set (match_dup 3)
6456	(vec_select:V8SF
6457	  (vec_concat:V16SF
6458	    (match_operand:V8SF 1 "register_operand")
6459	    (match_operand:V8SF 2 "nonimmediate_operand"))
6460	  (parallel [(const_int 0) (const_int 8)
6461		     (const_int 1) (const_int 9)
6462		     (const_int 4) (const_int 12)
6463		     (const_int 5) (const_int 13)])))
6464   (set (match_dup 4)
6465	(vec_select:V8SF
6466	  (vec_concat:V16SF
6467	    (match_dup 1)
6468	    (match_dup 2))
6469	  (parallel [(const_int 2) (const_int 10)
6470		     (const_int 3) (const_int 11)
6471		     (const_int 6) (const_int 14)
6472		     (const_int 7) (const_int 15)])))
6473   (set (match_operand:V8SF 0 "register_operand")
6474	(vec_select:V8SF
6475	  (vec_concat:V16SF
6476	    (match_dup 3)
6477	    (match_dup 4))
6478	  (parallel [(const_int 0) (const_int 1)
6479		     (const_int 2) (const_int 3)
6480		     (const_int 8) (const_int 9)
6481		     (const_int 10) (const_int 11)])))]
6482 "TARGET_AVX"
6483{
6484  operands[3] = gen_reg_rtx (V8SFmode);
6485  operands[4] = gen_reg_rtx (V8SFmode);
6486})
6487
6488(define_insn "vec_interleave_lowv4sf"
6489  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6490	(vec_select:V4SF
6491	  (vec_concat:V8SF
6492	    (match_operand:V4SF 1 "register_operand" "0,v")
6493	    (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6494	  (parallel [(const_int 0) (const_int 4)
6495		     (const_int 1) (const_int 5)])))]
6496  "TARGET_SSE"
6497  "@
6498   unpcklps\t{%2, %0|%0, %2}
6499   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6500  [(set_attr "isa" "noavx,avx")
6501   (set_attr "type" "sselog")
6502   (set_attr "prefix" "orig,maybe_evex")
6503   (set_attr "mode" "V4SF")])
6504
6505;; These are modeled with the same vec_concat as the others so that we
6506;; capture users of shufps that can use the new instructions
6507(define_insn "avx_movshdup256<mask_name>"
6508  [(set (match_operand:V8SF 0 "register_operand" "=v")
6509	(vec_select:V8SF
6510	  (vec_concat:V16SF
6511	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6512	    (match_dup 1))
6513	  (parallel [(const_int 1) (const_int 1)
6514		     (const_int 3) (const_int 3)
6515		     (const_int 5) (const_int 5)
6516		     (const_int 7) (const_int 7)])))]
6517  "TARGET_AVX && <mask_avx512vl_condition>"
6518  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6519  [(set_attr "type" "sse")
6520   (set_attr "prefix" "vex")
6521   (set_attr "mode" "V8SF")])
6522
6523(define_insn "sse3_movshdup<mask_name>"
6524  [(set (match_operand:V4SF 0 "register_operand" "=v")
6525	(vec_select:V4SF
6526	  (vec_concat:V8SF
6527	    (match_operand:V4SF 1 "vector_operand" "vBm")
6528	    (match_dup 1))
6529	  (parallel [(const_int 1)
6530		     (const_int 1)
6531		     (const_int 7)
6532		     (const_int 7)])))]
6533  "TARGET_SSE3 && <mask_avx512vl_condition>"
6534  "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6535  [(set_attr "type" "sse")
6536   (set_attr "prefix_rep" "1")
6537   (set_attr "prefix" "maybe_vex")
6538   (set_attr "mode" "V4SF")])
6539
6540(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6541  [(set (match_operand:V16SF 0 "register_operand" "=v")
6542	(vec_select:V16SF
6543	  (vec_concat:V32SF
6544	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6545	    (match_dup 1))
6546	  (parallel [(const_int 1) (const_int 1)
6547		     (const_int 3) (const_int 3)
6548		     (const_int 5) (const_int 5)
6549		     (const_int 7) (const_int 7)
6550		     (const_int 9) (const_int 9)
6551		     (const_int 11) (const_int 11)
6552		     (const_int 13) (const_int 13)
6553		     (const_int 15) (const_int 15)])))]
6554  "TARGET_AVX512F"
6555  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6556  [(set_attr "type" "sse")
6557   (set_attr "prefix" "evex")
6558   (set_attr "mode" "V16SF")])
6559
6560(define_insn "avx_movsldup256<mask_name>"
6561  [(set (match_operand:V8SF 0 "register_operand" "=v")
6562	(vec_select:V8SF
6563	  (vec_concat:V16SF
6564	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6565	    (match_dup 1))
6566	  (parallel [(const_int 0) (const_int 0)
6567		     (const_int 2) (const_int 2)
6568		     (const_int 4) (const_int 4)
6569		     (const_int 6) (const_int 6)])))]
6570  "TARGET_AVX && <mask_avx512vl_condition>"
6571  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6572  [(set_attr "type" "sse")
6573   (set_attr "prefix" "vex")
6574   (set_attr "mode" "V8SF")])
6575
6576(define_insn "sse3_movsldup<mask_name>"
6577  [(set (match_operand:V4SF 0 "register_operand" "=v")
6578	(vec_select:V4SF
6579	  (vec_concat:V8SF
6580	    (match_operand:V4SF 1 "vector_operand" "vBm")
6581	    (match_dup 1))
6582	  (parallel [(const_int 0)
6583		     (const_int 0)
6584		     (const_int 6)
6585		     (const_int 6)])))]
6586  "TARGET_SSE3 && <mask_avx512vl_condition>"
6587  "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6588  [(set_attr "type" "sse")
6589   (set_attr "prefix_rep" "1")
6590   (set_attr "prefix" "maybe_vex")
6591   (set_attr "mode" "V4SF")])
6592
6593(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6594  [(set (match_operand:V16SF 0 "register_operand" "=v")
6595	(vec_select:V16SF
6596	  (vec_concat:V32SF
6597	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6598	    (match_dup 1))
6599	  (parallel [(const_int 0) (const_int 0)
6600		     (const_int 2) (const_int 2)
6601		     (const_int 4) (const_int 4)
6602		     (const_int 6) (const_int 6)
6603		     (const_int 8) (const_int 8)
6604		     (const_int 10) (const_int 10)
6605		     (const_int 12) (const_int 12)
6606		     (const_int 14) (const_int 14)])))]
6607  "TARGET_AVX512F"
6608  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6609  [(set_attr "type" "sse")
6610   (set_attr "prefix" "evex")
6611   (set_attr "mode" "V16SF")])
6612
6613(define_expand "avx_shufps256<mask_expand4_name>"
6614  [(match_operand:V8SF 0 "register_operand")
6615   (match_operand:V8SF 1 "register_operand")
6616   (match_operand:V8SF 2 "nonimmediate_operand")
6617   (match_operand:SI 3 "const_int_operand")]
6618  "TARGET_AVX"
6619{
6620  int mask = INTVAL (operands[3]);
6621  emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6622						     operands[1],
6623						     operands[2],
6624						     GEN_INT ((mask >> 0) & 3),
6625						     GEN_INT ((mask >> 2) & 3),
6626						     GEN_INT (((mask >> 4) & 3) + 8),
6627						     GEN_INT (((mask >> 6) & 3) + 8),
6628						     GEN_INT (((mask >> 0) & 3) + 4),
6629						     GEN_INT (((mask >> 2) & 3) + 4),
6630						     GEN_INT (((mask >> 4) & 3) + 12),
6631						     GEN_INT (((mask >> 6) & 3) + 12)
6632						     <mask_expand4_args>));
6633  DONE;
6634})
6635
6636;; One bit in mask selects 2 elements.
6637(define_insn "avx_shufps256_1<mask_name>"
6638  [(set (match_operand:V8SF 0 "register_operand" "=v")
6639	(vec_select:V8SF
6640	  (vec_concat:V16SF
6641	    (match_operand:V8SF 1 "register_operand" "v")
6642	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6643	  (parallel [(match_operand 3  "const_0_to_3_operand"  )
6644		     (match_operand 4  "const_0_to_3_operand"  )
6645		     (match_operand 5  "const_8_to_11_operand" )
6646		     (match_operand 6  "const_8_to_11_operand" )
6647		     (match_operand 7  "const_4_to_7_operand"  )
6648		     (match_operand 8  "const_4_to_7_operand"  )
6649		     (match_operand 9  "const_12_to_15_operand")
6650		     (match_operand 10 "const_12_to_15_operand")])))]
6651  "TARGET_AVX
6652   && <mask_avx512vl_condition>
6653   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6654       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6655       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6656       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6657{
6658  int mask;
6659  mask = INTVAL (operands[3]);
6660  mask |= INTVAL (operands[4]) << 2;
6661  mask |= (INTVAL (operands[5]) - 8) << 4;
6662  mask |= (INTVAL (operands[6]) - 8) << 6;
6663  operands[3] = GEN_INT (mask);
6664
6665  return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6666}
6667  [(set_attr "type" "sseshuf")
6668   (set_attr "length_immediate" "1")
6669   (set_attr "prefix" "<mask_prefix>")
6670   (set_attr "mode" "V8SF")])
6671
6672(define_expand "sse_shufps<mask_expand4_name>"
6673  [(match_operand:V4SF 0 "register_operand")
6674   (match_operand:V4SF 1 "register_operand")
6675   (match_operand:V4SF 2 "vector_operand")
6676   (match_operand:SI 3 "const_int_operand")]
6677  "TARGET_SSE"
6678{
6679  int mask = INTVAL (operands[3]);
6680  emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6681						     operands[1],
6682						     operands[2],
6683						     GEN_INT ((mask >> 0) & 3),
6684						     GEN_INT ((mask >> 2) & 3),
6685						     GEN_INT (((mask >> 4) & 3) + 4),
6686						     GEN_INT (((mask >> 6) & 3) + 4)
6687						     <mask_expand4_args>));
6688  DONE;
6689})
6690
6691(define_insn "sse_shufps_v4sf_mask"
6692  [(set (match_operand:V4SF 0 "register_operand" "=v")
6693    (vec_merge:V4SF
6694	  (vec_select:V4SF
6695	    (vec_concat:V8SF
6696	      (match_operand:V4SF 1 "register_operand" "v")
6697	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6698	    (parallel [(match_operand 3 "const_0_to_3_operand")
6699	               (match_operand 4 "const_0_to_3_operand")
6700	               (match_operand 5 "const_4_to_7_operand")
6701	               (match_operand 6 "const_4_to_7_operand")]))
6702      (match_operand:V4SF 7 "vector_move_operand" "0C")
6703      (match_operand:QI 8 "register_operand" "Yk")))]
6704  "TARGET_AVX512VL"
6705{
6706  int mask = 0;
6707  mask |= INTVAL (operands[3]) << 0;
6708  mask |= INTVAL (operands[4]) << 2;
6709  mask |= (INTVAL (operands[5]) - 4) << 4;
6710  mask |= (INTVAL (operands[6]) - 4) << 6;
6711  operands[3] = GEN_INT (mask);
6712
6713  return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6714}
6715  [(set_attr "type" "sseshuf")
6716   (set_attr "length_immediate" "1")
6717   (set_attr "prefix" "evex")
6718   (set_attr "mode" "V4SF")])
6719
6720(define_insn "sse_shufps_<mode>"
6721  [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6722	(vec_select:VI4F_128
6723	  (vec_concat:<ssedoublevecmode>
6724	    (match_operand:VI4F_128 1 "register_operand" "0,v")
6725	    (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6726	  (parallel [(match_operand 3 "const_0_to_3_operand")
6727		     (match_operand 4 "const_0_to_3_operand")
6728		     (match_operand 5 "const_4_to_7_operand")
6729		     (match_operand 6 "const_4_to_7_operand")])))]
6730  "TARGET_SSE"
6731{
6732  int mask = 0;
6733  mask |= INTVAL (operands[3]) << 0;
6734  mask |= INTVAL (operands[4]) << 2;
6735  mask |= (INTVAL (operands[5]) - 4) << 4;
6736  mask |= (INTVAL (operands[6]) - 4) << 6;
6737  operands[3] = GEN_INT (mask);
6738
6739  switch (which_alternative)
6740    {
6741    case 0:
6742      return "shufps\t{%3, %2, %0|%0, %2, %3}";
6743    case 1:
6744      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6745    default:
6746      gcc_unreachable ();
6747    }
6748}
6749  [(set_attr "isa" "noavx,avx")
6750   (set_attr "type" "sseshuf")
6751   (set_attr "length_immediate" "1")
6752   (set_attr "prefix" "orig,maybe_evex")
6753   (set_attr "mode" "V4SF")])
6754
6755(define_insn "sse_storehps"
6756  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6757	(vec_select:V2SF
6758	  (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
6759	  (parallel [(const_int 2) (const_int 3)])))]
6760  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6761  "@
6762   %vmovhps\t{%1, %0|%q0, %1}
6763   %vmovhlps\t{%1, %d0|%d0, %1}
6764   %vmovlps\t{%H1, %d0|%d0, %H1}"
6765  [(set_attr "type" "ssemov")
6766   (set_attr "prefix" "maybe_vex")
6767   (set_attr "mode" "V2SF,V4SF,V2SF")])
6768
6769(define_expand "sse_loadhps_exp"
6770  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6771	(vec_concat:V4SF
6772	  (vec_select:V2SF
6773	    (match_operand:V4SF 1 "nonimmediate_operand")
6774	    (parallel [(const_int 0) (const_int 1)]))
6775	  (match_operand:V2SF 2 "nonimmediate_operand")))]
6776  "TARGET_SSE"
6777{
6778  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6779
6780  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6781
6782  /* Fix up the destination if needed.  */
6783  if (dst != operands[0])
6784    emit_move_insn (operands[0], dst);
6785
6786  DONE;
6787})
6788
6789(define_insn "sse_loadhps"
6790  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
6791	(vec_concat:V4SF
6792	  (vec_select:V2SF
6793	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6794	    (parallel [(const_int 0) (const_int 1)]))
6795	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,v,v")))]
6796  "TARGET_SSE"
6797  "@
6798   movhps\t{%2, %0|%0, %q2}
6799   vmovhps\t{%2, %1, %0|%0, %1, %q2}
6800   movlhps\t{%2, %0|%0, %2}
6801   vmovlhps\t{%2, %1, %0|%0, %1, %2}
6802   %vmovlps\t{%2, %H0|%H0, %2}"
6803  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6804   (set_attr "type" "ssemov")
6805   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6806   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6807
6808(define_insn "sse_storelps"
6809  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,v,v")
6810	(vec_select:V2SF
6811	  (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
6812	  (parallel [(const_int 0) (const_int 1)])))]
6813  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6814  "@
6815   %vmovlps\t{%1, %0|%q0, %1}
6816   %vmovaps\t{%1, %0|%0, %1}
6817   %vmovlps\t{%1, %d0|%d0, %q1}"
6818  [(set_attr "type" "ssemov")
6819   (set_attr "prefix" "maybe_vex")
6820   (set_attr "mode" "V2SF,V4SF,V2SF")])
6821
6822(define_expand "sse_loadlps_exp"
6823  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6824	(vec_concat:V4SF
6825	  (match_operand:V2SF 2 "nonimmediate_operand")
6826	  (vec_select:V2SF
6827	    (match_operand:V4SF 1 "nonimmediate_operand")
6828	    (parallel [(const_int 2) (const_int 3)]))))]
6829  "TARGET_SSE"
6830{
6831  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6832
6833  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6834
6835  /* Fix up the destination if needed.  */
6836  if (dst != operands[0])
6837    emit_move_insn (operands[0], dst);
6838
6839  DONE;
6840})
6841
6842(define_insn "sse_loadlps"
6843  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
6844	(vec_concat:V4SF
6845	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,v,m,m,v")
6846	  (vec_select:V2SF
6847	    (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
6848	    (parallel [(const_int 2) (const_int 3)]))))]
6849  "TARGET_SSE"
6850  "@
6851   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6852   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6853   movlps\t{%2, %0|%0, %q2}
6854   vmovlps\t{%2, %1, %0|%0, %1, %q2}
6855   %vmovlps\t{%2, %0|%q0, %2}"
6856  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6857   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6858   (set (attr "length_immediate")
6859     (if_then_else (eq_attr "alternative" "0,1")
6860		   (const_string "1")
6861		   (const_string "*")))
6862   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6863   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6864
6865(define_insn "sse_movss"
6866  [(set (match_operand:V4SF 0 "register_operand"   "=x,v")
6867	(vec_merge:V4SF
6868	  (match_operand:V4SF 2 "register_operand" " x,v")
6869	  (match_operand:V4SF 1 "register_operand" " 0,v")
6870	  (const_int 1)))]
6871  "TARGET_SSE"
6872  "@
6873   movss\t{%2, %0|%0, %2}
6874   vmovss\t{%2, %1, %0|%0, %1, %2}"
6875  [(set_attr "isa" "noavx,avx")
6876   (set_attr "type" "ssemov")
6877   (set_attr "prefix" "orig,maybe_evex")
6878   (set_attr "mode" "SF")])
6879
6880(define_insn "avx2_vec_dup<mode>"
6881  [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
6882	(vec_duplicate:VF1_128_256
6883	  (vec_select:SF
6884	    (match_operand:V4SF 1 "register_operand" "v")
6885	    (parallel [(const_int 0)]))))]
6886  "TARGET_AVX2"
6887  "vbroadcastss\t{%1, %0|%0, %1}"
6888  [(set_attr "type" "sselog1")
6889    (set_attr "prefix" "maybe_evex")
6890    (set_attr "mode" "<MODE>")])
6891
6892(define_insn "avx2_vec_dupv8sf_1"
6893  [(set (match_operand:V8SF 0 "register_operand" "=v")
6894	(vec_duplicate:V8SF
6895	  (vec_select:SF
6896	    (match_operand:V8SF 1 "register_operand" "v")
6897	    (parallel [(const_int 0)]))))]
6898  "TARGET_AVX2"
6899  "vbroadcastss\t{%x1, %0|%0, %x1}"
6900  [(set_attr "type" "sselog1")
6901    (set_attr "prefix" "maybe_evex")
6902    (set_attr "mode" "V8SF")])
6903
6904(define_insn "avx512f_vec_dup<mode>_1"
6905  [(set (match_operand:VF_512 0 "register_operand" "=v")
6906	(vec_duplicate:VF_512
6907	  (vec_select:<ssescalarmode>
6908	    (match_operand:VF_512 1 "register_operand" "v")
6909	    (parallel [(const_int 0)]))))]
6910  "TARGET_AVX512F"
6911  "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6912  [(set_attr "type" "sselog1")
6913    (set_attr "prefix" "evex")
6914    (set_attr "mode" "<MODE>")])
6915
6916;; Although insertps takes register source, we prefer
6917;; unpcklps with register source since it is shorter.
6918(define_insn "*vec_concatv2sf_sse4_1"
6919  [(set (match_operand:V2SF 0 "register_operand"
6920	  "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
6921	(vec_concat:V2SF
6922	  (match_operand:SF 1 "nonimmediate_operand"
6923	  "  0, 0,Yv, 0,0, v,m, 0 , m")
6924	  (match_operand:SF 2 "vector_move_operand"
6925	  " Yr,*x,Yv, m,m, m,C,*ym, C")))]
6926  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6927  "@
6928   unpcklps\t{%2, %0|%0, %2}
6929   unpcklps\t{%2, %0|%0, %2}
6930   vunpcklps\t{%2, %1, %0|%0, %1, %2}
6931   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6932   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6933   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6934   %vmovss\t{%1, %0|%0, %1}
6935   punpckldq\t{%2, %0|%0, %2}
6936   movd\t{%1, %0|%0, %1}"
6937  [(set (attr "isa")
6938     (cond [(eq_attr "alternative" "0,1,3,4")
6939	      (const_string "noavx")
6940	    (eq_attr "alternative" "2,5")
6941	      (const_string "avx")
6942	   ]
6943	   (const_string "*")))
6944   (set (attr "type")
6945     (cond [(eq_attr "alternative" "6")
6946	      (const_string "ssemov")
6947	    (eq_attr "alternative" "7")
6948	      (const_string "mmxcvt")
6949	    (eq_attr "alternative" "8")
6950	      (const_string "mmxmov")
6951	   ]
6952	   (const_string "sselog")))
6953   (set (attr "prefix_data16")
6954     (if_then_else (eq_attr "alternative" "3,4")
6955		   (const_string "1")
6956		   (const_string "*")))
6957   (set (attr "prefix_extra")
6958     (if_then_else (eq_attr "alternative" "3,4,5")
6959		   (const_string "1")
6960		   (const_string "*")))
6961   (set (attr "length_immediate")
6962     (if_then_else (eq_attr "alternative" "3,4,5")
6963		   (const_string "1")
6964		   (const_string "*")))
6965   (set (attr "prefix")
6966     (cond [(eq_attr "alternative" "2,5")
6967	      (const_string "maybe_evex")
6968	    (eq_attr "alternative" "6")
6969	      (const_string "maybe_vex")
6970	   ]
6971	   (const_string "orig")))
6972   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6973
6974;; ??? In theory we can match memory for the MMX alternative, but allowing
6975;; vector_operand for operand 2 and *not* allowing memory for the SSE
6976;; alternatives pretty much forces the MMX alternative to be chosen.
6977(define_insn "*vec_concatv2sf_sse"
6978  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
6979	(vec_concat:V2SF
6980	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6981	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
6982  "TARGET_SSE"
6983  "@
6984   unpcklps\t{%2, %0|%0, %2}
6985   movss\t{%1, %0|%0, %1}
6986   punpckldq\t{%2, %0|%0, %2}
6987   movd\t{%1, %0|%0, %1}"
6988  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6989   (set_attr "mode" "V4SF,SF,DI,DI")])
6990
6991(define_insn "*vec_concatv4sf"
6992  [(set (match_operand:V4SF 0 "register_operand"       "=x,v,x,v")
6993	(vec_concat:V4SF
6994	  (match_operand:V2SF 1 "register_operand"     " 0,v,0,v")
6995	  (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
6996  "TARGET_SSE"
6997  "@
6998   movlhps\t{%2, %0|%0, %2}
6999   vmovlhps\t{%2, %1, %0|%0, %1, %2}
7000   movhps\t{%2, %0|%0, %q2}
7001   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7002  [(set_attr "isa" "noavx,avx,noavx,avx")
7003   (set_attr "type" "ssemov")
7004   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
7005   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
7006
7007;; Avoid combining registers from different units in a single alternative,
7008;; see comment above inline_secondary_memory_needed function in i386.c
7009(define_insn "vec_set<mode>_0"
7010  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
7011	  "=Yr,*x,v,v,Yi,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
7012	(vec_merge:VI4F_128
7013	  (vec_duplicate:VI4F_128
7014	    (match_operand:<ssescalarmode> 2 "general_operand"
7015	  " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
7016	  (match_operand:VI4F_128 1 "vector_move_operand"
7017	  " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
7018	  (const_int 1)))]
7019  "TARGET_SSE"
7020  "@
7021   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7022   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7023   vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
7024   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
7025   %vmovd\t{%2, %0|%0, %2}
7026   movss\t{%2, %0|%0, %2}
7027   movss\t{%2, %0|%0, %2}
7028   vmovss\t{%2, %1, %0|%0, %1, %2}
7029   pinsrd\t{$0, %2, %0|%0, %2, 0}
7030   pinsrd\t{$0, %2, %0|%0, %2, 0}
7031   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
7032   #
7033   #
7034   #"
7035  [(set (attr "isa")
7036     (cond [(eq_attr "alternative" "0,1,8,9")
7037	      (const_string "sse4_noavx")
7038	    (eq_attr "alternative" "2,7,10")
7039	      (const_string "avx")
7040	    (eq_attr "alternative" "3,4")
7041	      (const_string "sse2")
7042	    (eq_attr "alternative" "5,6")
7043	      (const_string "noavx")
7044	   ]
7045	   (const_string "*")))
7046   (set (attr "type")
7047     (cond [(eq_attr "alternative" "0,1,2,8,9,10")
7048	      (const_string "sselog")
7049	    (eq_attr "alternative" "12")
7050	      (const_string "imov")
7051	    (eq_attr "alternative" "13")
7052	      (const_string "fmov")
7053	   ]
7054	   (const_string "ssemov")))
7055   (set (attr "prefix_extra")
7056     (if_then_else (eq_attr "alternative" "8,9,10")
7057		   (const_string "1")
7058		   (const_string "*")))
7059   (set (attr "length_immediate")
7060     (if_then_else (eq_attr "alternative" "8,9,10")
7061		   (const_string "1")
7062		   (const_string "*")))
7063   (set (attr "prefix")
7064     (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7065	      (const_string "orig")
7066	    (eq_attr "alternative" "2")
7067	      (const_string "maybe_evex")
7068	    (eq_attr "alternative" "3,4")
7069	      (const_string "maybe_vex")
7070	    (eq_attr "alternative" "7,10")
7071	      (const_string "vex")
7072	   ]
7073	   (const_string "*")))
7074   (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
7075
7076;; A subset is vec_setv4sf.
7077(define_insn "*vec_setv4sf_sse4_1"
7078  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7079	(vec_merge:V4SF
7080	  (vec_duplicate:V4SF
7081	    (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7082	  (match_operand:V4SF 1 "register_operand" "0,0,v")
7083	  (match_operand:SI 3 "const_int_operand")))]
7084  "TARGET_SSE4_1
7085   && ((unsigned) exact_log2 (INTVAL (operands[3]))
7086       < GET_MODE_NUNITS (V4SFmode))"
7087{
7088  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7089  switch (which_alternative)
7090    {
7091    case 0:
7092    case 1:
7093      return "insertps\t{%3, %2, %0|%0, %2, %3}";
7094    case 2:
7095      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7096    default:
7097      gcc_unreachable ();
7098    }
7099}
7100  [(set_attr "isa" "noavx,noavx,avx")
7101   (set_attr "type" "sselog")
7102   (set_attr "prefix_data16" "1,1,*")
7103   (set_attr "prefix_extra" "1")
7104   (set_attr "length_immediate" "1")
7105   (set_attr "prefix" "orig,orig,maybe_evex")
7106   (set_attr "mode" "V4SF")])
7107
7108;; All of vinsertps, vmovss, vmovd clear also the higher bits.
7109(define_insn "vec_set<mode>_0"
7110  [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,Yi")
7111	(vec_merge:VI4F_256_512
7112	  (vec_duplicate:VI4F_256_512
7113	    (match_operand:<ssescalarmode> 2 "general_operand" "v,m,r"))
7114	  (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
7115	  (const_int 1)))]
7116  "TARGET_AVX"
7117  "@
7118   vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
7119   vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
7120   vmovd\t{%2, %x0|%x0, %2}"
7121  [(set (attr "type")
7122     (if_then_else (eq_attr "alternative" "0")
7123		   (const_string "sselog")
7124		   (const_string "ssemov")))
7125   (set_attr "prefix" "maybe_evex")
7126   (set_attr "mode" "SF,<ssescalarmode>,SI")])
7127
7128(define_insn "sse4_1_insertps"
7129  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7130	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7131		      (match_operand:V4SF 1 "register_operand" "0,0,v")
7132		      (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7133		     UNSPEC_INSERTPS))]
7134  "TARGET_SSE4_1"
7135{
7136  if (MEM_P (operands[2]))
7137    {
7138      unsigned count_s = INTVAL (operands[3]) >> 6;
7139      if (count_s)
7140	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7141      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7142    }
7143  switch (which_alternative)
7144    {
7145    case 0:
7146    case 1:
7147      return "insertps\t{%3, %2, %0|%0, %2, %3}";
7148    case 2:
7149      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7150    default:
7151      gcc_unreachable ();
7152    }
7153}
7154  [(set_attr "isa" "noavx,noavx,avx")
7155   (set_attr "type" "sselog")
7156   (set_attr "prefix_data16" "1,1,*")
7157   (set_attr "prefix_extra" "1")
7158   (set_attr "length_immediate" "1")
7159   (set_attr "prefix" "orig,orig,maybe_evex")
7160   (set_attr "mode" "V4SF")])
7161
7162(define_split
7163  [(set (match_operand:VI4F_128 0 "memory_operand")
7164	(vec_merge:VI4F_128
7165	  (vec_duplicate:VI4F_128
7166	    (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7167	  (match_dup 0)
7168	  (const_int 1)))]
7169  "TARGET_SSE && reload_completed"
7170  [(set (match_dup 0) (match_dup 1))]
7171  "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7172
7173(define_expand "vec_set<mode>"
7174  [(match_operand:V 0 "register_operand")
7175   (match_operand:<ssescalarmode> 1 "register_operand")
7176   (match_operand 2 "const_int_operand")]
7177  "TARGET_SSE"
7178{
7179  ix86_expand_vector_set (false, operands[0], operands[1],
7180			  INTVAL (operands[2]));
7181  DONE;
7182})
7183
7184(define_insn_and_split "*vec_extractv4sf_0"
7185  [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7186	(vec_select:SF
7187	  (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7188	  (parallel [(const_int 0)])))]
7189  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7190  "#"
7191  "&& reload_completed"
7192  [(set (match_dup 0) (match_dup 1))]
7193  "operands[1] = gen_lowpart (SFmode, operands[1]);")
7194
7195(define_insn_and_split "*sse4_1_extractps"
7196  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7197	(vec_select:SF
7198	  (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7199	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7200  "TARGET_SSE4_1"
7201  "@
7202   extractps\t{%2, %1, %0|%0, %1, %2}
7203   extractps\t{%2, %1, %0|%0, %1, %2}
7204   vextractps\t{%2, %1, %0|%0, %1, %2}
7205   #
7206   #"
7207  "&& reload_completed && SSE_REG_P (operands[0])"
7208  [(const_int 0)]
7209{
7210  rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7211  switch (INTVAL (operands[2]))
7212    {
7213    case 1:
7214    case 3:
7215      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7216				      operands[2], operands[2],
7217				      GEN_INT (INTVAL (operands[2]) + 4),
7218				      GEN_INT (INTVAL (operands[2]) + 4)));
7219      break;
7220    case 2:
7221      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7222      break;
7223    default:
7224      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
7225      gcc_unreachable ();
7226    }
7227  DONE;
7228}
7229  [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7230   (set_attr "type" "sselog,sselog,sselog,*,*")
7231   (set_attr "prefix_data16" "1,1,1,*,*")
7232   (set_attr "prefix_extra" "1,1,1,*,*")
7233   (set_attr "length_immediate" "1,1,1,*,*")
7234   (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7235   (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7236
7237(define_insn_and_split "*vec_extractv4sf_mem"
7238  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7239	(vec_select:SF
7240	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
7241	  (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7242  "TARGET_SSE"
7243  "#"
7244  "&& reload_completed"
7245  [(set (match_dup 0) (match_dup 1))]
7246{
7247  operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7248})
7249
7250(define_mode_attr extract_type
7251  [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7252
7253(define_mode_attr extract_suf
7254  [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7255
7256(define_mode_iterator AVX512_VEC
7257  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7258
7259(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7260  [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7261   (match_operand:AVX512_VEC 1 "register_operand")
7262   (match_operand:SI 2 "const_0_to_3_operand")
7263   (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7264   (match_operand:QI 4 "register_operand")]
7265  "TARGET_AVX512F"
7266{
7267  int mask;
7268  mask = INTVAL (operands[2]);
7269  rtx dest = operands[0];
7270
7271  if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7272    dest = gen_reg_rtx (<ssequartermode>mode);
7273
7274  if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7275    emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7276        operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7277	GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7278	operands[4]));
7279  else
7280    emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7281        operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7282	operands[4]));
7283  if (dest != operands[0])
7284    emit_move_insn (operands[0], dest);
7285  DONE;
7286})
7287
7288(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7289  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7290	(vec_merge:<ssequartermode>
7291	  (vec_select:<ssequartermode>
7292	    (match_operand:V8FI 1 "register_operand" "v")
7293	    (parallel [(match_operand 2  "const_0_to_7_operand")
7294	      (match_operand 3  "const_0_to_7_operand")]))
7295	  (match_operand:<ssequartermode> 4 "memory_operand" "0")
7296	  (match_operand:QI 5 "register_operand" "Yk")))]
7297  "TARGET_AVX512DQ
7298   && INTVAL (operands[2]) % 2 == 0
7299   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7300   && rtx_equal_p (operands[4], operands[0])"
7301{
7302  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7303  return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7304}
7305  [(set_attr "type" "sselog")
7306   (set_attr "prefix_extra" "1")
7307   (set_attr "length_immediate" "1")
7308   (set_attr "memory" "store")
7309   (set_attr "prefix" "evex")
7310   (set_attr "mode" "<sseinsnmode>")])
7311
7312(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7313  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7314	(vec_merge:<ssequartermode>
7315	  (vec_select:<ssequartermode>
7316	    (match_operand:V16FI 1 "register_operand" "v")
7317	    (parallel [(match_operand 2  "const_0_to_15_operand")
7318	      (match_operand 3  "const_0_to_15_operand")
7319	      (match_operand 4  "const_0_to_15_operand")
7320	      (match_operand 5  "const_0_to_15_operand")]))
7321	  (match_operand:<ssequartermode> 6 "memory_operand" "0")
7322	  (match_operand:QI 7 "register_operand" "Yk")))]
7323  "TARGET_AVX512F
7324   && INTVAL (operands[2]) % 4 == 0
7325   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7326   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7327   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
7328   && rtx_equal_p (operands[6], operands[0])"
7329{
7330  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7331  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7332}
7333  [(set_attr "type" "sselog")
7334   (set_attr "prefix_extra" "1")
7335   (set_attr "length_immediate" "1")
7336   (set_attr "memory" "store")
7337   (set_attr "prefix" "evex")
7338   (set_attr "mode" "<sseinsnmode>")])
7339
7340(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7341  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7342	(vec_select:<ssequartermode>
7343	  (match_operand:V8FI 1 "register_operand" "v")
7344	  (parallel [(match_operand 2  "const_0_to_7_operand")
7345            (match_operand 3  "const_0_to_7_operand")])))]
7346  "TARGET_AVX512DQ
7347   && INTVAL (operands[2]) % 2 == 0
7348   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
7349{
7350  operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
7351  return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7352}
7353  [(set_attr "type" "sselog1")
7354   (set_attr "prefix_extra" "1")
7355   (set_attr "length_immediate" "1")
7356   (set_attr "prefix" "evex")
7357   (set_attr "mode" "<sseinsnmode>")])
7358
7359(define_split
7360  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
7361	(vec_select:<ssequartermode>
7362	  (match_operand:V8FI 1 "register_operand")
7363	  (parallel [(const_int 0) (const_int 1)])))]
7364  "TARGET_AVX512DQ
7365   && reload_completed
7366   && (TARGET_AVX512VL
7367       || REG_P (operands[0])
7368       || !EXT_REX_SSE_REG_P (operands[1]))"
7369  [(set (match_dup 0) (match_dup 1))]
7370{
7371  if (!TARGET_AVX512VL
7372      && REG_P (operands[0])
7373      && EXT_REX_SSE_REG_P (operands[1]))
7374    operands[0]
7375      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
7376  else
7377    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
7378})
7379
7380(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7381  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7382	(vec_select:<ssequartermode>
7383	  (match_operand:V16FI 1 "register_operand" "v")
7384	  (parallel [(match_operand 2  "const_0_to_15_operand")
7385            (match_operand 3  "const_0_to_15_operand")
7386            (match_operand 4  "const_0_to_15_operand")
7387            (match_operand 5  "const_0_to_15_operand")])))]
7388  "TARGET_AVX512F
7389   && INTVAL (operands[2]) % 4 == 0
7390   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7391   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7392   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
7393{
7394  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7395  return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7396}
7397  [(set_attr "type" "sselog1")
7398   (set_attr "prefix_extra" "1")
7399   (set_attr "length_immediate" "1")
7400   (set_attr "prefix" "evex")
7401   (set_attr "mode" "<sseinsnmode>")])
7402
7403(define_split
7404  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
7405	(vec_select:<ssequartermode>
7406	  (match_operand:V16FI 1 "register_operand")
7407	  (parallel [(const_int 0) (const_int 1)
7408		     (const_int 2) (const_int 3)])))]
7409  "TARGET_AVX512F
7410   && reload_completed
7411   && (TARGET_AVX512VL
7412       || REG_P (operands[0])
7413       || !EXT_REX_SSE_REG_P (operands[1]))"
7414  [(set (match_dup 0) (match_dup 1))]
7415{
7416  if (!TARGET_AVX512VL
7417      && REG_P (operands[0])
7418      && EXT_REX_SSE_REG_P (operands[1]))
7419    operands[0]
7420      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
7421  else
7422    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
7423})
7424
7425(define_mode_attr extract_type_2
7426  [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7427
7428(define_mode_attr extract_suf_2
7429  [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7430
7431(define_mode_iterator AVX512_VEC_2
7432  [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7433
7434(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7435  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7436   (match_operand:AVX512_VEC_2 1 "register_operand")
7437   (match_operand:SI 2 "const_0_to_1_operand")
7438   (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7439   (match_operand:QI 4 "register_operand")]
7440  "TARGET_AVX512F"
7441{
7442  rtx (*insn)(rtx, rtx, rtx, rtx);
7443  rtx dest = operands[0];
7444
7445  if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
7446    dest = gen_reg_rtx (<ssehalfvecmode>mode);
7447
7448  switch (INTVAL (operands[2]))
7449    {
7450    case 0:
7451      insn = gen_vec_extract_lo_<mode>_mask;
7452      break;
7453    case 1:
7454      insn = gen_vec_extract_hi_<mode>_mask;
7455      break;
7456    default:
7457      gcc_unreachable ();
7458    }
7459
7460  emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7461  if (dest != operands[0])
7462    emit_move_insn (operands[0], dest);
7463  DONE;
7464})
7465
7466(define_split
7467  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7468	(vec_select:<ssehalfvecmode>
7469	  (match_operand:V8FI 1 "nonimmediate_operand")
7470	  (parallel [(const_int 0) (const_int 1)
7471            (const_int 2) (const_int 3)])))]
7472  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7473   && reload_completed
7474   && (TARGET_AVX512VL
7475       || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7476  [(set (match_dup 0) (match_dup 1))]
7477  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7478
7479(define_insn "vec_extract_lo_<mode>_maskm"
7480  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7481	(vec_merge:<ssehalfvecmode>
7482	  (vec_select:<ssehalfvecmode>
7483	    (match_operand:V8FI 1 "register_operand" "v")
7484	    (parallel [(const_int 0) (const_int 1)
7485	      (const_int 2) (const_int 3)]))
7486	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7487	  (match_operand:QI 3 "register_operand" "Yk")))]
7488  "TARGET_AVX512F
7489   && rtx_equal_p (operands[2], operands[0])"
7490  "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7491  [(set_attr "type" "sselog1")
7492   (set_attr "prefix_extra" "1")
7493   (set_attr "length_immediate" "1")
7494   (set_attr "prefix" "evex")
7495   (set_attr "mode" "<sseinsnmode>")])
7496
7497(define_insn "vec_extract_lo_<mode><mask_name>"
7498  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>,v")
7499	(vec_select:<ssehalfvecmode>
7500	  (match_operand:V8FI 1 "<store_mask_predicate>" "v,v,<store_mask_constraint>")
7501	  (parallel [(const_int 0) (const_int 1)
7502            (const_int 2) (const_int 3)])))]
7503  "TARGET_AVX512F
7504   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7505{
7506  if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
7507    return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7508  else
7509    return "#";
7510}
7511  [(set_attr "type" "sselog1")
7512   (set_attr "prefix_extra" "1")
7513   (set_attr "length_immediate" "1")
7514   (set_attr "memory" "none,store,load")
7515   (set_attr "prefix" "evex")
7516   (set_attr "mode" "<sseinsnmode>")])
7517
7518(define_insn "vec_extract_hi_<mode>_maskm"
7519  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7520	(vec_merge:<ssehalfvecmode>
7521	  (vec_select:<ssehalfvecmode>
7522	    (match_operand:V8FI 1 "register_operand" "v")
7523	    (parallel [(const_int 4) (const_int 5)
7524	      (const_int 6) (const_int 7)]))
7525	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7526	  (match_operand:QI 3 "register_operand" "Yk")))]
7527  "TARGET_AVX512F
7528   && rtx_equal_p (operands[2], operands[0])"
7529  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7530  [(set_attr "type" "sselog")
7531   (set_attr "prefix_extra" "1")
7532   (set_attr "length_immediate" "1")
7533   (set_attr "memory" "store")
7534   (set_attr "prefix" "evex")
7535   (set_attr "mode" "<sseinsnmode>")])
7536
7537(define_insn "vec_extract_hi_<mode><mask_name>"
7538  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7539	(vec_select:<ssehalfvecmode>
7540	  (match_operand:V8FI 1 "register_operand" "v")
7541	  (parallel [(const_int 4) (const_int 5)
7542            (const_int 6) (const_int 7)])))]
7543  "TARGET_AVX512F"
7544  "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7545  [(set_attr "type" "sselog1")
7546   (set_attr "prefix_extra" "1")
7547   (set_attr "length_immediate" "1")
7548   (set_attr "prefix" "evex")
7549   (set_attr "mode" "<sseinsnmode>")])
7550
7551(define_insn "vec_extract_hi_<mode>_maskm"
7552   [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7553	(vec_merge:<ssehalfvecmode>
7554	  (vec_select:<ssehalfvecmode>
7555	    (match_operand:V16FI 1 "register_operand" "v")
7556	    (parallel [(const_int 8) (const_int 9)
7557	      (const_int 10) (const_int 11)
7558	      (const_int 12) (const_int 13)
7559	      (const_int 14) (const_int 15)]))
7560	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7561	  (match_operand:QI 3 "register_operand" "Yk")))]
7562  "TARGET_AVX512DQ
7563   && rtx_equal_p (operands[2], operands[0])"
7564  "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7565  [(set_attr "type" "sselog1")
7566   (set_attr "prefix_extra" "1")
7567   (set_attr "length_immediate" "1")
7568   (set_attr "prefix" "evex")
7569   (set_attr "mode" "<sseinsnmode>")])
7570
7571(define_insn "vec_extract_hi_<mode><mask_name>"
7572  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7573	(vec_select:<ssehalfvecmode>
7574	  (match_operand:V16FI 1 "register_operand" "v,v")
7575	  (parallel [(const_int 8) (const_int 9)
7576            (const_int 10) (const_int 11)
7577	    (const_int 12) (const_int 13)
7578	    (const_int 14) (const_int 15)])))]
7579  "TARGET_AVX512F && <mask_avx512dq_condition>"
7580  "@
7581   vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7582   vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7583  [(set_attr "type" "sselog1")
7584   (set_attr "prefix_extra" "1")
7585   (set_attr "isa" "avx512dq,noavx512dq")
7586   (set_attr "length_immediate" "1")
7587   (set_attr "prefix" "evex")
7588   (set_attr "mode" "<sseinsnmode>")])
7589
7590(define_expand "avx512vl_vextractf128<mode>"
7591  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7592   (match_operand:VI48F_256 1 "register_operand")
7593   (match_operand:SI 2 "const_0_to_1_operand")
7594   (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7595   (match_operand:QI 4 "register_operand")]
7596  "TARGET_AVX512DQ && TARGET_AVX512VL"
7597{
7598  rtx (*insn)(rtx, rtx, rtx, rtx);
7599  rtx dest = operands[0];
7600
7601  if (MEM_P (dest)
7602      && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
7603	  /* For V8S[IF]mode there are maskm insns with =m and 0
7604	     constraints.  */
7605	  ? !rtx_equal_p (dest, operands[3])
7606	  /* For V4D[IF]mode, hi insns don't allow memory, and
7607	     lo insns have =m and 0C constraints.  */
7608	  : (operands[2] != const0_rtx
7609	     || (!rtx_equal_p (dest, operands[3])
7610		 && GET_CODE (operands[3]) != CONST_VECTOR))))
7611    dest = gen_reg_rtx (<ssehalfvecmode>mode);
7612  switch (INTVAL (operands[2]))
7613    {
7614    case 0:
7615      insn = gen_vec_extract_lo_<mode>_mask;
7616      break;
7617    case 1:
7618      insn = gen_vec_extract_hi_<mode>_mask;
7619      break;
7620    default:
7621      gcc_unreachable ();
7622    }
7623
7624  emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7625  if (dest != operands[0])
7626    emit_move_insn (operands[0], dest);
7627  DONE;
7628})
7629
7630(define_expand "avx_vextractf128<mode>"
7631  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7632   (match_operand:V_256 1 "register_operand")
7633   (match_operand:SI 2 "const_0_to_1_operand")]
7634  "TARGET_AVX"
7635{
7636  rtx (*insn)(rtx, rtx);
7637
7638  switch (INTVAL (operands[2]))
7639    {
7640    case 0:
7641      insn = gen_vec_extract_lo_<mode>;
7642      break;
7643    case 1:
7644      insn = gen_vec_extract_hi_<mode>;
7645      break;
7646    default:
7647      gcc_unreachable ();
7648    }
7649
7650  emit_insn (insn (operands[0], operands[1]));
7651  DONE;
7652})
7653
7654(define_insn "vec_extract_lo_<mode><mask_name>"
7655  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
7656	(vec_select:<ssehalfvecmode>
7657	  (match_operand:V16FI 1 "<store_mask_predicate>"
7658				 "v,<store_mask_constraint>,v")
7659	  (parallel [(const_int 0) (const_int 1)
7660                     (const_int 2) (const_int 3)
7661                     (const_int 4) (const_int 5)
7662                     (const_int 6) (const_int 7)])))]
7663  "TARGET_AVX512F
7664   && <mask_mode512bit_condition>
7665   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7666{
7667  if (<mask_applied>
7668      || (!TARGET_AVX512VL
7669	  && !REG_P (operands[0])
7670	  && EXT_REX_SSE_REG_P (operands[1])))
7671    return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7672  else
7673    return "#";
7674}
7675  [(set_attr "type" "sselog1")
7676   (set_attr "prefix_extra" "1")
7677   (set_attr "length_immediate" "1")
7678   (set_attr "memory" "none,load,store")
7679   (set_attr "prefix" "evex")
7680   (set_attr "mode" "<sseinsnmode>")])
7681
7682(define_split
7683  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7684	(vec_select:<ssehalfvecmode>
7685	  (match_operand:V16FI 1 "nonimmediate_operand")
7686	  (parallel [(const_int 0) (const_int 1)
7687            (const_int 2) (const_int 3)
7688	    (const_int 4) (const_int 5)
7689	    (const_int 6) (const_int 7)])))]
7690  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7691   && reload_completed
7692   && (TARGET_AVX512VL
7693       || REG_P (operands[0])
7694       || !EXT_REX_SSE_REG_P (operands[1]))"
7695  [(set (match_dup 0) (match_dup 1))]
7696{
7697  if (!TARGET_AVX512VL
7698      && REG_P (operands[0])
7699      && EXT_REX_SSE_REG_P (operands[1]))
7700    operands[0]
7701      = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
7702  else
7703    operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
7704})
7705
7706(define_insn "vec_extract_lo_<mode><mask_name>"
7707  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
7708	(vec_select:<ssehalfvecmode>
7709	  (match_operand:VI8F_256 1 "<store_mask_predicate>"
7710				    "v,<store_mask_constraint>,v")
7711	  (parallel [(const_int 0) (const_int 1)])))]
7712  "TARGET_AVX
7713   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7714   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7715{
7716  if (<mask_applied>)
7717    return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7718  else
7719    return "#";
7720}
7721   [(set_attr "type" "sselog1")
7722    (set_attr "prefix_extra" "1")
7723    (set_attr "length_immediate" "1")
7724    (set_attr "memory" "none,load,store")
7725    (set_attr "prefix" "evex")
7726    (set_attr "mode" "XI")])
7727
7728(define_split
7729  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7730	(vec_select:<ssehalfvecmode>
7731	  (match_operand:VI8F_256 1 "nonimmediate_operand")
7732	  (parallel [(const_int 0) (const_int 1)])))]
7733  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7734   && reload_completed"
7735  [(set (match_dup 0) (match_dup 1))]
7736  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7737
7738(define_insn "vec_extract_hi_<mode><mask_name>"
7739  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7740	(vec_select:<ssehalfvecmode>
7741	  (match_operand:VI8F_256 1 "register_operand" "v,v")
7742	  (parallel [(const_int 2) (const_int 3)])))]
7743  "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7744{
7745  if (TARGET_AVX512VL)
7746  {
7747    if (TARGET_AVX512DQ)
7748      return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7749    else
7750      return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7751  }
7752  else
7753    return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7754}
7755  [(set_attr "type" "sselog1")
7756   (set_attr "prefix_extra" "1")
7757   (set_attr "length_immediate" "1")
7758   (set_attr "prefix" "vex")
7759   (set_attr "mode" "<sseinsnmode>")])
7760
7761(define_split
7762  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7763	(vec_select:<ssehalfvecmode>
7764	  (match_operand:VI4F_256 1 "nonimmediate_operand")
7765	  (parallel [(const_int 0) (const_int 1)
7766		     (const_int 2) (const_int 3)])))]
7767  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7768   && reload_completed"
7769  [(set (match_dup 0) (match_dup 1))]
7770  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7771
7772(define_insn "vec_extract_lo_<mode><mask_name>"
7773  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
7774					  "=<store_mask_constraint>,v")
7775	(vec_select:<ssehalfvecmode>
7776	  (match_operand:VI4F_256 1 "<store_mask_predicate>"
7777				    "v,<store_mask_constraint>")
7778	  (parallel [(const_int 0) (const_int 1)
7779		     (const_int 2) (const_int 3)])))]
7780  "TARGET_AVX
7781   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7782   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7783{
7784  if (<mask_applied>)
7785    return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7786  else
7787    return "#";
7788}
7789  [(set_attr "type" "sselog1")
7790   (set_attr "prefix_extra" "1")
7791   (set_attr "length_immediate" "1")
7792   (set_attr "prefix" "evex")
7793   (set_attr "mode" "<sseinsnmode>")])
7794
7795(define_insn "vec_extract_lo_<mode>_maskm"
7796  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7797	(vec_merge:<ssehalfvecmode>
7798	  (vec_select:<ssehalfvecmode>
7799	    (match_operand:VI4F_256 1 "register_operand" "v")
7800	    (parallel [(const_int 0) (const_int 1)
7801		      (const_int 2) (const_int 3)]))
7802	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7803	  (match_operand:QI 3 "register_operand" "Yk")))]
7804  "TARGET_AVX512VL && TARGET_AVX512F
7805   && rtx_equal_p (operands[2], operands[0])"
7806  "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7807  [(set_attr "type" "sselog1")
7808   (set_attr "prefix_extra" "1")
7809   (set_attr "length_immediate" "1")
7810   (set_attr "prefix" "evex")
7811   (set_attr "mode" "<sseinsnmode>")])
7812
7813(define_insn "vec_extract_hi_<mode>_maskm"
7814  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7815	(vec_merge:<ssehalfvecmode>
7816	  (vec_select:<ssehalfvecmode>
7817	    (match_operand:VI4F_256 1 "register_operand" "v")
7818	    (parallel [(const_int 4) (const_int 5)
7819		      (const_int 6) (const_int 7)]))
7820	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7821	  (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
7822  "TARGET_AVX512F && TARGET_AVX512VL
7823   && rtx_equal_p (operands[2], operands[0])"
7824  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7825  [(set_attr "type" "sselog1")
7826   (set_attr "length_immediate" "1")
7827   (set_attr "prefix" "evex")
7828   (set_attr "mode" "<sseinsnmode>")])
7829
7830(define_insn "vec_extract_hi_<mode>_mask"
7831  [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7832	(vec_merge:<ssehalfvecmode>
7833	  (vec_select:<ssehalfvecmode>
7834	    (match_operand:VI4F_256 1 "register_operand" "v")
7835	    (parallel [(const_int 4) (const_int 5)
7836		       (const_int 6) (const_int 7)]))
7837	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7838	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7839  "TARGET_AVX512VL"
7840  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7841  [(set_attr "type" "sselog1")
7842   (set_attr "length_immediate" "1")
7843   (set_attr "prefix" "evex")
7844   (set_attr "mode" "<sseinsnmode>")])
7845
7846(define_insn "vec_extract_hi_<mode>"
7847  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7848	(vec_select:<ssehalfvecmode>
7849	  (match_operand:VI4F_256 1 "register_operand" "x, v")
7850	  (parallel [(const_int 4) (const_int 5)
7851		     (const_int 6) (const_int 7)])))]
7852  "TARGET_AVX"
7853  "@
7854    vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7855    vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7856  [(set_attr "isa" "*, avx512vl")
7857   (set_attr "prefix" "vex, evex")
7858   (set_attr "type" "sselog1")
7859   (set_attr "length_immediate" "1")
7860   (set_attr "mode" "<sseinsnmode>")])
7861
7862(define_insn_and_split "vec_extract_lo_v32hi"
7863  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
7864	(vec_select:V16HI
7865	  (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
7866	  (parallel [(const_int 0) (const_int 1)
7867		     (const_int 2) (const_int 3)
7868		     (const_int 4) (const_int 5)
7869		     (const_int 6) (const_int 7)
7870		     (const_int 8) (const_int 9)
7871		     (const_int 10) (const_int 11)
7872		     (const_int 12) (const_int 13)
7873		     (const_int 14) (const_int 15)])))]
7874  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7875{
7876  if (TARGET_AVX512VL
7877      || REG_P (operands[0])
7878      || !EXT_REX_SSE_REG_P (operands[1]))
7879    return "#";
7880  else
7881    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
7882}
7883  "&& reload_completed
7884   && (TARGET_AVX512VL
7885       || REG_P (operands[0])
7886       || !EXT_REX_SSE_REG_P (operands[1]))"
7887  [(set (match_dup 0) (match_dup 1))]
7888{
7889  if (!TARGET_AVX512VL
7890      && REG_P (operands[0])
7891      && EXT_REX_SSE_REG_P (operands[1]))
7892    operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
7893  else
7894    operands[1] = gen_lowpart (V16HImode, operands[1]);
7895}
7896  [(set_attr "type" "sselog1")
7897   (set_attr "prefix_extra" "1")
7898   (set_attr "length_immediate" "1")
7899   (set_attr "memory" "none,load,store")
7900   (set_attr "prefix" "evex")
7901   (set_attr "mode" "XI")])
7902
7903(define_insn "vec_extract_hi_v32hi"
7904  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
7905	(vec_select:V16HI
7906	  (match_operand:V32HI 1 "register_operand" "v")
7907	  (parallel [(const_int 16) (const_int 17)
7908		     (const_int 18) (const_int 19)
7909		     (const_int 20) (const_int 21)
7910		     (const_int 22) (const_int 23)
7911		     (const_int 24) (const_int 25)
7912		     (const_int 26) (const_int 27)
7913		     (const_int 28) (const_int 29)
7914		     (const_int 30) (const_int 31)])))]
7915  "TARGET_AVX512F"
7916  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7917  [(set_attr "type" "sselog1")
7918   (set_attr "prefix_extra" "1")
7919   (set_attr "length_immediate" "1")
7920   (set_attr "prefix" "evex")
7921   (set_attr "mode" "XI")])
7922
7923(define_insn_and_split "vec_extract_lo_v16hi"
7924  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
7925	(vec_select:V8HI
7926	  (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
7927	  (parallel [(const_int 0) (const_int 1)
7928		     (const_int 2) (const_int 3)
7929		     (const_int 4) (const_int 5)
7930		     (const_int 6) (const_int 7)])))]
7931  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7932  "#"
7933  "&& reload_completed"
7934  [(set (match_dup 0) (match_dup 1))]
7935  "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7936
7937(define_insn "vec_extract_hi_v16hi"
7938  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
7939	(vec_select:V8HI
7940	  (match_operand:V16HI 1 "register_operand" "x,v,v")
7941	  (parallel [(const_int 8) (const_int 9)
7942		     (const_int 10) (const_int 11)
7943		     (const_int 12) (const_int 13)
7944		     (const_int 14) (const_int 15)])))]
7945  "TARGET_AVX"
7946  "@
7947   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7948   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7949   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7950  [(set_attr "type" "sselog1")
7951   (set_attr "prefix_extra" "1")
7952   (set_attr "length_immediate" "1")
7953   (set_attr "isa" "*,avx512dq,avx512f")
7954   (set_attr "prefix" "vex,evex,evex")
7955   (set_attr "mode" "OI")])
7956
7957(define_insn_and_split "vec_extract_lo_v64qi"
7958  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
7959	(vec_select:V32QI
7960	  (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
7961	  (parallel [(const_int 0) (const_int 1)
7962		     (const_int 2) (const_int 3)
7963		     (const_int 4) (const_int 5)
7964		     (const_int 6) (const_int 7)
7965		     (const_int 8) (const_int 9)
7966		     (const_int 10) (const_int 11)
7967		     (const_int 12) (const_int 13)
7968		     (const_int 14) (const_int 15)
7969		     (const_int 16) (const_int 17)
7970		     (const_int 18) (const_int 19)
7971		     (const_int 20) (const_int 21)
7972		     (const_int 22) (const_int 23)
7973		     (const_int 24) (const_int 25)
7974		     (const_int 26) (const_int 27)
7975		     (const_int 28) (const_int 29)
7976		     (const_int 30) (const_int 31)])))]
7977  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7978{
7979  if (TARGET_AVX512VL
7980      || REG_P (operands[0])
7981      || !EXT_REX_SSE_REG_P (operands[1]))
7982    return "#";
7983  else
7984    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
7985}
7986  "&& reload_completed
7987   && (TARGET_AVX512VL
7988       || REG_P (operands[0])
7989       || !EXT_REX_SSE_REG_P (operands[1]))"
7990  [(set (match_dup 0) (match_dup 1))]
7991{
7992  if (!TARGET_AVX512VL
7993      && REG_P (operands[0])
7994      && EXT_REX_SSE_REG_P (operands[1]))
7995    operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
7996  else
7997    operands[1] = gen_lowpart (V32QImode, operands[1]);
7998}
7999  [(set_attr "type" "sselog1")
8000   (set_attr "prefix_extra" "1")
8001   (set_attr "length_immediate" "1")
8002   (set_attr "memory" "none,load,store")
8003   (set_attr "prefix" "evex")
8004   (set_attr "mode" "XI")])
8005
8006(define_insn "vec_extract_hi_v64qi"
8007  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
8008	(vec_select:V32QI
8009	  (match_operand:V64QI 1 "register_operand" "v")
8010	  (parallel [(const_int 32) (const_int 33)
8011		     (const_int 34) (const_int 35)
8012		     (const_int 36) (const_int 37)
8013		     (const_int 38) (const_int 39)
8014		     (const_int 40) (const_int 41)
8015		     (const_int 42) (const_int 43)
8016		     (const_int 44) (const_int 45)
8017		     (const_int 46) (const_int 47)
8018		     (const_int 48) (const_int 49)
8019		     (const_int 50) (const_int 51)
8020		     (const_int 52) (const_int 53)
8021		     (const_int 54) (const_int 55)
8022		     (const_int 56) (const_int 57)
8023		     (const_int 58) (const_int 59)
8024		     (const_int 60) (const_int 61)
8025		     (const_int 62) (const_int 63)])))]
8026  "TARGET_AVX512F"
8027  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8028  [(set_attr "type" "sselog1")
8029   (set_attr "prefix_extra" "1")
8030   (set_attr "length_immediate" "1")
8031   (set_attr "prefix" "evex")
8032   (set_attr "mode" "XI")])
8033
8034(define_insn_and_split "vec_extract_lo_v32qi"
8035  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
8036	(vec_select:V16QI
8037	  (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
8038	  (parallel [(const_int 0) (const_int 1)
8039		     (const_int 2) (const_int 3)
8040		     (const_int 4) (const_int 5)
8041		     (const_int 6) (const_int 7)
8042		     (const_int 8) (const_int 9)
8043		     (const_int 10) (const_int 11)
8044		     (const_int 12) (const_int 13)
8045		     (const_int 14) (const_int 15)])))]
8046  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8047  "#"
8048  "&& reload_completed"
8049  [(set (match_dup 0) (match_dup 1))]
8050  "operands[1] = gen_lowpart (V16QImode, operands[1]);")
8051
8052(define_insn "vec_extract_hi_v32qi"
8053  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
8054	(vec_select:V16QI
8055	  (match_operand:V32QI 1 "register_operand" "x,v,v")
8056	  (parallel [(const_int 16) (const_int 17)
8057		     (const_int 18) (const_int 19)
8058		     (const_int 20) (const_int 21)
8059		     (const_int 22) (const_int 23)
8060		     (const_int 24) (const_int 25)
8061		     (const_int 26) (const_int 27)
8062		     (const_int 28) (const_int 29)
8063		     (const_int 30) (const_int 31)])))]
8064  "TARGET_AVX"
8065  "@
8066   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8067   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8068   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8069  [(set_attr "type" "sselog1")
8070   (set_attr "prefix_extra" "1")
8071   (set_attr "length_immediate" "1")
8072   (set_attr "isa" "*,avx512dq,avx512f")
8073   (set_attr "prefix" "vex,evex,evex")
8074   (set_attr "mode" "OI")])
8075
8076;; Modes handled by vec_extract patterns.
8077(define_mode_iterator VEC_EXTRACT_MODE
8078  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
8079   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
8080   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
8081   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
8082   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
8083   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
8084   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
8085
8086(define_expand "vec_extract<mode><ssescalarmodelower>"
8087  [(match_operand:<ssescalarmode> 0 "register_operand")
8088   (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
8089   (match_operand 2 "const_int_operand")]
8090  "TARGET_SSE"
8091{
8092  ix86_expand_vector_extract (false, operands[0], operands[1],
8093			      INTVAL (operands[2]));
8094  DONE;
8095})
8096
8097(define_expand "vec_extract<mode><ssehalfvecmodelower>"
8098  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8099   (match_operand:V_512 1 "register_operand")
8100   (match_operand 2 "const_0_to_1_operand")]
8101  "TARGET_AVX512F"
8102{
8103  if (INTVAL (operands[2]))
8104    emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
8105  else
8106    emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
8107  DONE;
8108})
8109
8110;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8111;;
8112;; Parallel double-precision floating point element swizzling
8113;;
8114;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8115
8116(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
8117  [(set (match_operand:V8DF 0 "register_operand" "=v")
8118	(vec_select:V8DF
8119	  (vec_concat:V16DF
8120	    (match_operand:V8DF 1 "register_operand" "v")
8121	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8122	  (parallel [(const_int 1) (const_int 9)
8123		     (const_int 3) (const_int 11)
8124		     (const_int 5) (const_int 13)
8125		     (const_int 7) (const_int 15)])))]
8126  "TARGET_AVX512F"
8127  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8128  [(set_attr "type" "sselog")
8129   (set_attr "prefix" "evex")
8130   (set_attr "mode" "V8DF")])
8131
8132;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8133(define_insn "avx_unpckhpd256<mask_name>"
8134  [(set (match_operand:V4DF 0 "register_operand" "=v")
8135	(vec_select:V4DF
8136	  (vec_concat:V8DF
8137	    (match_operand:V4DF 1 "register_operand" "v")
8138	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8139	  (parallel [(const_int 1) (const_int 5)
8140		     (const_int 3) (const_int 7)])))]
8141  "TARGET_AVX && <mask_avx512vl_condition>"
8142  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8143  [(set_attr "type" "sselog")
8144   (set_attr "prefix" "vex")
8145   (set_attr "mode" "V4DF")])
8146
8147(define_expand "vec_interleave_highv4df"
8148  [(set (match_dup 3)
8149	(vec_select:V4DF
8150	  (vec_concat:V8DF
8151	    (match_operand:V4DF 1 "register_operand")
8152	    (match_operand:V4DF 2 "nonimmediate_operand"))
8153	  (parallel [(const_int 0) (const_int 4)
8154		     (const_int 2) (const_int 6)])))
8155   (set (match_dup 4)
8156	(vec_select:V4DF
8157	  (vec_concat:V8DF
8158	    (match_dup 1)
8159	    (match_dup 2))
8160	  (parallel [(const_int 1) (const_int 5)
8161		     (const_int 3) (const_int 7)])))
8162   (set (match_operand:V4DF 0 "register_operand")
8163	(vec_select:V4DF
8164	  (vec_concat:V8DF
8165	    (match_dup 3)
8166	    (match_dup 4))
8167	  (parallel [(const_int 2) (const_int 3)
8168		     (const_int 6) (const_int 7)])))]
8169 "TARGET_AVX"
8170{
8171  operands[3] = gen_reg_rtx (V4DFmode);
8172  operands[4] = gen_reg_rtx (V4DFmode);
8173})
8174
8175
8176(define_insn "avx512vl_unpckhpd128_mask"
8177  [(set (match_operand:V2DF 0 "register_operand" "=v")
8178	(vec_merge:V2DF
8179	  (vec_select:V2DF
8180	    (vec_concat:V4DF
8181	      (match_operand:V2DF 1 "register_operand" "v")
8182	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8183	    (parallel [(const_int 1) (const_int 3)]))
8184	  (match_operand:V2DF 3 "vector_move_operand" "0C")
8185	  (match_operand:QI 4 "register_operand" "Yk")))]
8186  "TARGET_AVX512VL"
8187  "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8188  [(set_attr "type" "sselog")
8189   (set_attr "prefix" "evex")
8190   (set_attr "mode" "V2DF")])
8191
8192(define_expand "vec_interleave_highv2df"
8193  [(set (match_operand:V2DF 0 "register_operand")
8194	(vec_select:V2DF
8195	  (vec_concat:V4DF
8196	    (match_operand:V2DF 1 "nonimmediate_operand")
8197	    (match_operand:V2DF 2 "nonimmediate_operand"))
8198	  (parallel [(const_int 1)
8199		     (const_int 3)])))]
8200  "TARGET_SSE2"
8201{
8202  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8203    operands[2] = force_reg (V2DFmode, operands[2]);
8204})
8205
8206(define_insn "*vec_interleave_highv2df"
8207  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,m")
8208	(vec_select:V2DF
8209	  (vec_concat:V4DF
8210	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8211	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8212	  (parallel [(const_int 1)
8213		     (const_int 3)])))]
8214  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8215  "@
8216   unpckhpd\t{%2, %0|%0, %2}
8217   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8218   %vmovddup\t{%H1, %0|%0, %H1}
8219   movlpd\t{%H1, %0|%0, %H1}
8220   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8221   %vmovhpd\t{%1, %0|%q0, %1}"
8222  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8223   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8224   (set (attr "prefix_data16")
8225     (if_then_else (eq_attr "alternative" "3,5")
8226		   (const_string "1")
8227		   (const_string "*")))
8228   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8229   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8230
8231(define_expand "avx512f_movddup512<mask_name>"
8232  [(set (match_operand:V8DF 0 "register_operand")
8233	(vec_select:V8DF
8234	  (vec_concat:V16DF
8235	    (match_operand:V8DF 1 "nonimmediate_operand")
8236	    (match_dup 1))
8237	  (parallel [(const_int 0) (const_int 8)
8238		     (const_int 2) (const_int 10)
8239		     (const_int 4) (const_int 12)
8240		     (const_int 6) (const_int 14)])))]
8241  "TARGET_AVX512F")
8242
8243(define_expand "avx512f_unpcklpd512<mask_name>"
8244  [(set (match_operand:V8DF 0 "register_operand")
8245	(vec_select:V8DF
8246	  (vec_concat:V16DF
8247	    (match_operand:V8DF 1 "register_operand")
8248	    (match_operand:V8DF 2 "nonimmediate_operand"))
8249	  (parallel [(const_int 0) (const_int 8)
8250		     (const_int 2) (const_int 10)
8251		     (const_int 4) (const_int 12)
8252		     (const_int 6) (const_int 14)])))]
8253  "TARGET_AVX512F")
8254
8255(define_insn "*avx512f_unpcklpd512<mask_name>"
8256  [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8257	(vec_select:V8DF
8258	  (vec_concat:V16DF
8259	    (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8260	    (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8261	  (parallel [(const_int 0) (const_int 8)
8262		     (const_int 2) (const_int 10)
8263		     (const_int 4) (const_int 12)
8264		     (const_int 6) (const_int 14)])))]
8265  "TARGET_AVX512F"
8266  "@
8267   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8268   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8269  [(set_attr "type" "sselog")
8270   (set_attr "prefix" "evex")
8271   (set_attr "mode" "V8DF")])
8272
8273;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8274(define_expand "avx_movddup256<mask_name>"
8275  [(set (match_operand:V4DF 0 "register_operand")
8276	(vec_select:V4DF
8277	  (vec_concat:V8DF
8278	    (match_operand:V4DF 1 "nonimmediate_operand")
8279	    (match_dup 1))
8280	  (parallel [(const_int 0) (const_int 4)
8281		     (const_int 2) (const_int 6)])))]
8282  "TARGET_AVX && <mask_avx512vl_condition>")
8283
8284(define_expand "avx_unpcklpd256<mask_name>"
8285  [(set (match_operand:V4DF 0 "register_operand")
8286	(vec_select:V4DF
8287	  (vec_concat:V8DF
8288	    (match_operand:V4DF 1 "register_operand")
8289	    (match_operand:V4DF 2 "nonimmediate_operand"))
8290	  (parallel [(const_int 0) (const_int 4)
8291		     (const_int 2) (const_int 6)])))]
8292  "TARGET_AVX && <mask_avx512vl_condition>")
8293
8294(define_insn "*avx_unpcklpd256<mask_name>"
8295  [(set (match_operand:V4DF 0 "register_operand"         "=v,v")
8296	(vec_select:V4DF
8297	  (vec_concat:V8DF
8298	    (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
8299	    (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
8300	  (parallel [(const_int 0) (const_int 4)
8301		     (const_int 2) (const_int 6)])))]
8302  "TARGET_AVX && <mask_avx512vl_condition>"
8303  "@
8304   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
8305   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
8306  [(set_attr "type" "sselog")
8307   (set_attr "prefix" "vex")
8308   (set_attr "mode" "V4DF")])
8309
8310(define_expand "vec_interleave_lowv4df"
8311  [(set (match_dup 3)
8312	(vec_select:V4DF
8313	  (vec_concat:V8DF
8314	    (match_operand:V4DF 1 "register_operand")
8315	    (match_operand:V4DF 2 "nonimmediate_operand"))
8316	  (parallel [(const_int 0) (const_int 4)
8317		     (const_int 2) (const_int 6)])))
8318   (set (match_dup 4)
8319	(vec_select:V4DF
8320	  (vec_concat:V8DF
8321	    (match_dup 1)
8322	    (match_dup 2))
8323	  (parallel [(const_int 1) (const_int 5)
8324		     (const_int 3) (const_int 7)])))
8325   (set (match_operand:V4DF 0 "register_operand")
8326	(vec_select:V4DF
8327	  (vec_concat:V8DF
8328	    (match_dup 3)
8329	    (match_dup 4))
8330	  (parallel [(const_int 0) (const_int 1)
8331		     (const_int 4) (const_int 5)])))]
8332 "TARGET_AVX"
8333{
8334  operands[3] = gen_reg_rtx (V4DFmode);
8335  operands[4] = gen_reg_rtx (V4DFmode);
8336})
8337
8338(define_insn "avx512vl_unpcklpd128_mask"
8339  [(set (match_operand:V2DF 0 "register_operand" "=v")
8340	(vec_merge:V2DF
8341	  (vec_select:V2DF
8342	    (vec_concat:V4DF
8343	      (match_operand:V2DF 1 "register_operand" "v")
8344	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8345	    (parallel [(const_int 0) (const_int 2)]))
8346	  (match_operand:V2DF 3 "vector_move_operand" "0C")
8347	  (match_operand:QI 4 "register_operand" "Yk")))]
8348  "TARGET_AVX512VL"
8349  "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8350  [(set_attr "type" "sselog")
8351   (set_attr "prefix" "evex")
8352   (set_attr "mode" "V2DF")])
8353
8354(define_expand "vec_interleave_lowv2df"
8355  [(set (match_operand:V2DF 0 "register_operand")
8356	(vec_select:V2DF
8357	  (vec_concat:V4DF
8358	    (match_operand:V2DF 1 "nonimmediate_operand")
8359	    (match_operand:V2DF 2 "nonimmediate_operand"))
8360	  (parallel [(const_int 0)
8361		     (const_int 2)])))]
8362  "TARGET_SSE2"
8363{
8364  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8365    operands[1] = force_reg (V2DFmode, operands[1]);
8366})
8367
8368(define_insn "*vec_interleave_lowv2df"
8369  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,o")
8370	(vec_select:V2DF
8371	  (vec_concat:V4DF
8372	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
8373	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
8374	  (parallel [(const_int 0)
8375		     (const_int 2)])))]
8376  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8377  "@
8378   unpcklpd\t{%2, %0|%0, %2}
8379   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8380   %vmovddup\t{%1, %0|%0, %q1}
8381   movhpd\t{%2, %0|%0, %q2}
8382   vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8383   %vmovlpd\t{%2, %H0|%H0, %2}"
8384  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8385   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8386   (set (attr "prefix_data16")
8387     (if_then_else (eq_attr "alternative" "3,5")
8388		   (const_string "1")
8389		   (const_string "*")))
8390   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8391   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8392
8393(define_split
8394  [(set (match_operand:V2DF 0 "memory_operand")
8395	(vec_select:V2DF
8396	  (vec_concat:V4DF
8397	    (match_operand:V2DF 1 "register_operand")
8398	    (match_dup 1))
8399	  (parallel [(const_int 0)
8400		     (const_int 2)])))]
8401  "TARGET_SSE3 && reload_completed"
8402  [(const_int 0)]
8403{
8404  rtx low = gen_lowpart (DFmode, operands[1]);
8405
8406  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8407  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8408  DONE;
8409})
8410
8411(define_split
8412  [(set (match_operand:V2DF 0 "register_operand")
8413	(vec_select:V2DF
8414	  (vec_concat:V4DF
8415	    (match_operand:V2DF 1 "memory_operand")
8416	    (match_dup 1))
8417	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8418		     (match_operand:SI 3 "const_int_operand")])))]
8419  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8420  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8421{
8422  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8423})
8424
8425(define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
8426  [(set (match_operand:VF_128 0 "register_operand" "=v")
8427	(vec_merge:VF_128
8428	  (unspec:VF_128
8429	    [(match_operand:VF_128 1 "register_operand" "v")
8430	     (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
8431	    UNSPEC_SCALEF)
8432	  (match_dup 1)
8433	  (const_int 1)))]
8434  "TARGET_AVX512F"
8435  "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
8436  [(set_attr "prefix" "evex")
8437   (set_attr "mode"  "<ssescalarmode>")])
8438
8439(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8440  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8441	(unspec:VF_AVX512VL
8442	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8443	   (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8444	  UNSPEC_SCALEF))]
8445  "TARGET_AVX512F"
8446  "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8447  [(set_attr "prefix" "evex")
8448   (set_attr "mode"  "<MODE>")])
8449
8450(define_expand "<avx512>_vternlog<mode>_maskz"
8451  [(match_operand:VI48_AVX512VL 0 "register_operand")
8452   (match_operand:VI48_AVX512VL 1 "register_operand")
8453   (match_operand:VI48_AVX512VL 2 "register_operand")
8454   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8455   (match_operand:SI 4 "const_0_to_255_operand")
8456   (match_operand:<avx512fmaskmode> 5 "register_operand")]
8457  "TARGET_AVX512F"
8458{
8459  emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8460    operands[0], operands[1], operands[2], operands[3],
8461    operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8462  DONE;
8463})
8464
8465(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8466  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8467	(unspec:VI48_AVX512VL
8468	  [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8469	   (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8470	   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8471	   (match_operand:SI 4 "const_0_to_255_operand")]
8472	  UNSPEC_VTERNLOG))]
8473  "TARGET_AVX512F"
8474  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8475  [(set_attr "type" "sselog")
8476   (set_attr "prefix" "evex")
8477   (set_attr "mode" "<sseinsnmode>")])
8478
8479(define_insn "<avx512>_vternlog<mode>_mask"
8480  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8481	(vec_merge:VI48_AVX512VL
8482	  (unspec:VI48_AVX512VL
8483	    [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8484	     (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8485	     (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8486	     (match_operand:SI 4 "const_0_to_255_operand")]
8487	    UNSPEC_VTERNLOG)
8488	  (match_dup 1)
8489	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8490  "TARGET_AVX512F"
8491  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8492  [(set_attr "type" "sselog")
8493   (set_attr "prefix" "evex")
8494   (set_attr "mode" "<sseinsnmode>")])
8495
8496(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8497  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8498        (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8499                        UNSPEC_GETEXP))]
8500   "TARGET_AVX512F"
8501   "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8502    [(set_attr "prefix" "evex")
8503     (set_attr "mode" "<MODE>")])
8504
8505(define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
8506  [(set (match_operand:VF_128 0 "register_operand" "=v")
8507	(vec_merge:VF_128
8508	  (unspec:VF_128
8509	    [(match_operand:VF_128 1 "register_operand" "v")
8510	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
8511	    UNSPEC_GETEXP)
8512	  (match_dup 1)
8513	  (const_int 1)))]
8514   "TARGET_AVX512F"
8515   "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
8516    [(set_attr "prefix" "evex")
8517     (set_attr "mode" "<ssescalarmode>")])
8518
8519(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8520  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8521        (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8522			       (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8523			       (match_operand:SI 3 "const_0_to_255_operand")]
8524			      UNSPEC_ALIGN))]
8525  "TARGET_AVX512F"
8526  "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8527  [(set_attr "prefix" "evex")
8528   (set_attr "mode" "<sseinsnmode>")])
8529
8530(define_expand "avx512f_shufps512_mask"
8531  [(match_operand:V16SF 0 "register_operand")
8532   (match_operand:V16SF 1 "register_operand")
8533   (match_operand:V16SF 2 "nonimmediate_operand")
8534   (match_operand:SI 3 "const_0_to_255_operand")
8535   (match_operand:V16SF 4 "register_operand")
8536   (match_operand:HI 5 "register_operand")]
8537  "TARGET_AVX512F"
8538{
8539  int mask = INTVAL (operands[3]);
8540  emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8541					  GEN_INT ((mask >> 0) & 3),
8542					  GEN_INT ((mask >> 2) & 3),
8543					  GEN_INT (((mask >> 4) & 3) + 16),
8544					  GEN_INT (((mask >> 6) & 3) + 16),
8545					  GEN_INT (((mask >> 0) & 3) + 4),
8546					  GEN_INT (((mask >> 2) & 3) + 4),
8547					  GEN_INT (((mask >> 4) & 3) + 20),
8548					  GEN_INT (((mask >> 6) & 3) + 20),
8549					  GEN_INT (((mask >> 0) & 3) + 8),
8550					  GEN_INT (((mask >> 2) & 3) + 8),
8551					  GEN_INT (((mask >> 4) & 3) + 24),
8552					  GEN_INT (((mask >> 6) & 3) + 24),
8553					  GEN_INT (((mask >> 0) & 3) + 12),
8554					  GEN_INT (((mask >> 2) & 3) + 12),
8555					  GEN_INT (((mask >> 4) & 3) + 28),
8556					  GEN_INT (((mask >> 6) & 3) + 28),
8557					  operands[4], operands[5]));
8558  DONE;
8559})
8560
8561
8562(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8563  [(match_operand:VF_AVX512VL 0 "register_operand")
8564   (match_operand:VF_AVX512VL 1 "register_operand")
8565   (match_operand:VF_AVX512VL 2 "register_operand")
8566   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8567   (match_operand:SI 4 "const_0_to_255_operand")
8568   (match_operand:<avx512fmaskmode> 5 "register_operand")]
8569  "TARGET_AVX512F"
8570{
8571  emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8572	operands[0], operands[1], operands[2], operands[3],
8573	operands[4], CONST0_RTX (<MODE>mode), operands[5]
8574	<round_saeonly_expand_operand6>));
8575  DONE;
8576})
8577
8578(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8579  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8580        (unspec:VF_AVX512VL
8581          [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8582	   (match_operand:VF_AVX512VL 2 "register_operand" "v")
8583           (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8584           (match_operand:SI 4 "const_0_to_255_operand")]
8585           UNSPEC_FIXUPIMM))]
8586  "TARGET_AVX512F"
8587  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8588  [(set_attr "prefix" "evex")
8589   (set_attr "mode" "<MODE>")])
8590
8591(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8592  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8593	(vec_merge:VF_AVX512VL
8594          (unspec:VF_AVX512VL
8595            [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8596	     (match_operand:VF_AVX512VL 2 "register_operand" "v")
8597             (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8598             (match_operand:SI 4 "const_0_to_255_operand")]
8599             UNSPEC_FIXUPIMM)
8600	  (match_dup 1)
8601	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8602  "TARGET_AVX512F"
8603  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8604  [(set_attr "prefix" "evex")
8605   (set_attr "mode" "<MODE>")])
8606
8607(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8608  [(match_operand:VF_128 0 "register_operand")
8609   (match_operand:VF_128 1 "register_operand")
8610   (match_operand:VF_128 2 "register_operand")
8611   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8612   (match_operand:SI 4 "const_0_to_255_operand")
8613   (match_operand:<avx512fmaskmode> 5 "register_operand")]
8614  "TARGET_AVX512F"
8615{
8616  emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8617	operands[0], operands[1], operands[2], operands[3],
8618	operands[4], CONST0_RTX (<MODE>mode), operands[5]
8619	<round_saeonly_expand_operand6>));
8620  DONE;
8621})
8622
8623(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
8624  [(set (match_operand:VF_128 0 "register_operand" "=v")
8625	(vec_merge:VF_128
8626          (unspec:VF_128
8627            [(match_operand:VF_128 1 "register_operand" "0")
8628	     (match_operand:VF_128 2 "register_operand" "v")
8629	     (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8630	     (match_operand:SI 4 "const_0_to_255_operand")]
8631	    UNSPEC_FIXUPIMM)
8632	  (match_dup 1)
8633	  (const_int 1)))]
8634   "TARGET_AVX512F"
8635   "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
8636   [(set_attr "prefix" "evex")
8637   (set_attr "mode" "<ssescalarmode>")])
8638
8639(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
8640  [(set (match_operand:VF_128 0 "register_operand" "=v")
8641	(vec_merge:VF_128
8642	  (vec_merge:VF_128
8643	    (unspec:VF_128
8644	       [(match_operand:VF_128 1 "register_operand" "0")
8645		(match_operand:VF_128 2 "register_operand" "v")
8646		(match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8647		(match_operand:SI 4 "const_0_to_255_operand")]
8648	       UNSPEC_FIXUPIMM)
8649	    (match_dup 1)
8650	    (const_int 1))
8651	  (match_dup 1)
8652	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8653  "TARGET_AVX512F"
8654  "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
8655  [(set_attr "prefix" "evex")
8656   (set_attr "mode" "<ssescalarmode>")])
8657
8658(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8659  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8660	(unspec:VF_AVX512VL
8661	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8662	   (match_operand:SI 2 "const_0_to_255_operand")]
8663	  UNSPEC_ROUND))]
8664  "TARGET_AVX512F"
8665  "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8666  [(set_attr "length_immediate" "1")
8667   (set_attr "prefix" "evex")
8668   (set_attr "mode" "<MODE>")])
8669
8670(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8671  [(set (match_operand:VF_128 0 "register_operand" "=v")
8672	(vec_merge:VF_128
8673	  (unspec:VF_128
8674	    [(match_operand:VF_128 1 "register_operand" "v")
8675	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8676	     (match_operand:SI 3 "const_0_to_255_operand")]
8677	    UNSPEC_ROUND)
8678	  (match_dup 1)
8679	  (const_int 1)))]
8680  "TARGET_AVX512F"
8681  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
8682  [(set_attr "length_immediate" "1")
8683   (set_attr "prefix" "evex")
8684   (set_attr "mode" "<MODE>")])
8685
8686;; One bit in mask selects 2 elements.
8687(define_insn "avx512f_shufps512_1<mask_name>"
8688  [(set (match_operand:V16SF 0 "register_operand" "=v")
8689	(vec_select:V16SF
8690	  (vec_concat:V32SF
8691	    (match_operand:V16SF 1 "register_operand" "v")
8692	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8693	  (parallel [(match_operand 3  "const_0_to_3_operand")
8694		     (match_operand 4  "const_0_to_3_operand")
8695		     (match_operand 5  "const_16_to_19_operand")
8696		     (match_operand 6  "const_16_to_19_operand")
8697		     (match_operand 7  "const_4_to_7_operand")
8698		     (match_operand 8  "const_4_to_7_operand")
8699		     (match_operand 9  "const_20_to_23_operand")
8700		     (match_operand 10  "const_20_to_23_operand")
8701		     (match_operand 11  "const_8_to_11_operand")
8702		     (match_operand 12  "const_8_to_11_operand")
8703		     (match_operand 13  "const_24_to_27_operand")
8704		     (match_operand 14  "const_24_to_27_operand")
8705		     (match_operand 15  "const_12_to_15_operand")
8706		     (match_operand 16  "const_12_to_15_operand")
8707		     (match_operand 17  "const_28_to_31_operand")
8708		     (match_operand 18  "const_28_to_31_operand")])))]
8709  "TARGET_AVX512F
8710   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8711       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8712       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8713       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8714       && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8715       && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8716       && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8717       && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8718       && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8719       && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8720       && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8721       && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8722{
8723  int mask;
8724  mask = INTVAL (operands[3]);
8725  mask |= INTVAL (operands[4]) << 2;
8726  mask |= (INTVAL (operands[5]) - 16) << 4;
8727  mask |= (INTVAL (operands[6]) - 16) << 6;
8728  operands[3] = GEN_INT (mask);
8729
8730  return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8731}
8732  [(set_attr "type" "sselog")
8733   (set_attr "length_immediate" "1")
8734   (set_attr "prefix" "evex")
8735   (set_attr "mode" "V16SF")])
8736
8737(define_expand "avx512f_shufpd512_mask"
8738  [(match_operand:V8DF 0 "register_operand")
8739   (match_operand:V8DF 1 "register_operand")
8740   (match_operand:V8DF 2 "nonimmediate_operand")
8741   (match_operand:SI 3 "const_0_to_255_operand")
8742   (match_operand:V8DF 4 "register_operand")
8743   (match_operand:QI 5 "register_operand")]
8744  "TARGET_AVX512F"
8745{
8746  int mask = INTVAL (operands[3]);
8747  emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8748					GEN_INT (mask & 1),
8749					GEN_INT (mask & 2 ? 9 : 8),
8750					GEN_INT (mask & 4 ? 3 : 2),
8751					GEN_INT (mask & 8 ? 11 : 10),
8752					GEN_INT (mask & 16 ? 5 : 4),
8753					GEN_INT (mask & 32 ? 13 : 12),
8754					GEN_INT (mask & 64 ? 7 : 6),
8755					GEN_INT (mask & 128 ? 15 : 14),
8756					operands[4], operands[5]));
8757  DONE;
8758})
8759
8760(define_insn "avx512f_shufpd512_1<mask_name>"
8761  [(set (match_operand:V8DF 0 "register_operand" "=v")
8762	(vec_select:V8DF
8763	  (vec_concat:V16DF
8764	    (match_operand:V8DF 1 "register_operand" "v")
8765	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8766	  (parallel [(match_operand 3 "const_0_to_1_operand")
8767		     (match_operand 4 "const_8_to_9_operand")
8768		     (match_operand 5 "const_2_to_3_operand")
8769		     (match_operand 6 "const_10_to_11_operand")
8770		     (match_operand 7 "const_4_to_5_operand")
8771		     (match_operand 8 "const_12_to_13_operand")
8772		     (match_operand 9 "const_6_to_7_operand")
8773		     (match_operand 10 "const_14_to_15_operand")])))]
8774  "TARGET_AVX512F"
8775{
8776  int mask;
8777  mask = INTVAL (operands[3]);
8778  mask |= (INTVAL (operands[4]) - 8) << 1;
8779  mask |= (INTVAL (operands[5]) - 2) << 2;
8780  mask |= (INTVAL (operands[6]) - 10) << 3;
8781  mask |= (INTVAL (operands[7]) - 4) << 4;
8782  mask |= (INTVAL (operands[8]) - 12) << 5;
8783  mask |= (INTVAL (operands[9]) - 6) << 6;
8784  mask |= (INTVAL (operands[10]) - 14) << 7;
8785  operands[3] = GEN_INT (mask);
8786
8787  return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8788}
8789  [(set_attr "type" "sselog")
8790   (set_attr "length_immediate" "1")
8791   (set_attr "prefix" "evex")
8792   (set_attr "mode" "V8DF")])
8793
8794(define_expand "avx_shufpd256<mask_expand4_name>"
8795  [(match_operand:V4DF 0 "register_operand")
8796   (match_operand:V4DF 1 "register_operand")
8797   (match_operand:V4DF 2 "nonimmediate_operand")
8798   (match_operand:SI 3 "const_int_operand")]
8799  "TARGET_AVX"
8800{
8801  int mask = INTVAL (operands[3]);
8802  emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8803						     operands[1],
8804						     operands[2],
8805						     GEN_INT (mask & 1),
8806						     GEN_INT (mask & 2 ? 5 : 4),
8807						     GEN_INT (mask & 4 ? 3 : 2),
8808						     GEN_INT (mask & 8 ? 7 : 6)
8809						     <mask_expand4_args>));
8810  DONE;
8811})
8812
8813(define_insn "avx_shufpd256_1<mask_name>"
8814  [(set (match_operand:V4DF 0 "register_operand" "=v")
8815	(vec_select:V4DF
8816	  (vec_concat:V8DF
8817	    (match_operand:V4DF 1 "register_operand" "v")
8818	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8819	  (parallel [(match_operand 3 "const_0_to_1_operand")
8820		     (match_operand 4 "const_4_to_5_operand")
8821		     (match_operand 5 "const_2_to_3_operand")
8822		     (match_operand 6 "const_6_to_7_operand")])))]
8823  "TARGET_AVX && <mask_avx512vl_condition>"
8824{
8825  int mask;
8826  mask = INTVAL (operands[3]);
8827  mask |= (INTVAL (operands[4]) - 4) << 1;
8828  mask |= (INTVAL (operands[5]) - 2) << 2;
8829  mask |= (INTVAL (operands[6]) - 6) << 3;
8830  operands[3] = GEN_INT (mask);
8831
8832  return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8833}
8834  [(set_attr "type" "sseshuf")
8835   (set_attr "length_immediate" "1")
8836   (set_attr "prefix" "vex")
8837   (set_attr "mode" "V4DF")])
8838
8839(define_expand "sse2_shufpd<mask_expand4_name>"
8840  [(match_operand:V2DF 0 "register_operand")
8841   (match_operand:V2DF 1 "register_operand")
8842   (match_operand:V2DF 2 "vector_operand")
8843   (match_operand:SI 3 "const_int_operand")]
8844  "TARGET_SSE2"
8845{
8846  int mask = INTVAL (operands[3]);
8847  emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8848						      operands[2], GEN_INT (mask & 1),
8849						      GEN_INT (mask & 2 ? 3 : 2)
8850						      <mask_expand4_args>));
8851  DONE;
8852})
8853
8854(define_insn "sse2_shufpd_v2df_mask"
8855  [(set (match_operand:V2DF 0 "register_operand" "=v")
8856    (vec_merge:V2DF
8857	  (vec_select:V2DF
8858	    (vec_concat:V4DF
8859	      (match_operand:V2DF 1 "register_operand" "v")
8860	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8861	    (parallel [(match_operand 3 "const_0_to_1_operand")
8862		           (match_operand 4 "const_2_to_3_operand")]))
8863      (match_operand:V2DF 5 "vector_move_operand" "0C")
8864      (match_operand:QI 6 "register_operand" "Yk")))]
8865  "TARGET_AVX512VL"
8866{
8867  int mask;
8868  mask = INTVAL (operands[3]);
8869  mask |= (INTVAL (operands[4]) - 2) << 1;
8870  operands[3] = GEN_INT (mask);
8871
8872  return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
8873}
8874  [(set_attr "type" "sseshuf")
8875   (set_attr "length_immediate" "1")
8876   (set_attr "prefix" "evex")
8877   (set_attr "mode" "V2DF")])
8878
8879;; punpcklqdq and punpckhqdq are shorter than shufpd.
8880(define_insn "avx2_interleave_highv4di<mask_name>"
8881  [(set (match_operand:V4DI 0 "register_operand" "=v")
8882	(vec_select:V4DI
8883	  (vec_concat:V8DI
8884	    (match_operand:V4DI 1 "register_operand" "v")
8885	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8886	  (parallel [(const_int 1)
8887		     (const_int 5)
8888		     (const_int 3)
8889		     (const_int 7)])))]
8890  "TARGET_AVX2 && <mask_avx512vl_condition>"
8891  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8892  [(set_attr "type" "sselog")
8893   (set_attr "prefix" "vex")
8894   (set_attr "mode" "OI")])
8895
8896(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8897  [(set (match_operand:V8DI 0 "register_operand" "=v")
8898	(vec_select:V8DI
8899	  (vec_concat:V16DI
8900	    (match_operand:V8DI 1 "register_operand" "v")
8901	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8902	  (parallel [(const_int 1) (const_int 9)
8903		     (const_int 3) (const_int 11)
8904		     (const_int 5) (const_int 13)
8905		     (const_int 7) (const_int 15)])))]
8906  "TARGET_AVX512F"
8907  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8908  [(set_attr "type" "sselog")
8909   (set_attr "prefix" "evex")
8910   (set_attr "mode" "XI")])
8911
8912(define_insn "vec_interleave_highv2di<mask_name>"
8913  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8914	(vec_select:V2DI
8915	  (vec_concat:V4DI
8916	    (match_operand:V2DI 1 "register_operand" "0,v")
8917	    (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8918	  (parallel [(const_int 1)
8919		     (const_int 3)])))]
8920  "TARGET_SSE2 && <mask_avx512vl_condition>"
8921  "@
8922   punpckhqdq\t{%2, %0|%0, %2}
8923   vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8924  [(set_attr "isa" "noavx,avx")
8925   (set_attr "type" "sselog")
8926   (set_attr "prefix_data16" "1,*")
8927   (set_attr "prefix" "orig,<mask_prefix>")
8928   (set_attr "mode" "TI")])
8929
8930(define_insn "avx2_interleave_lowv4di<mask_name>"
8931  [(set (match_operand:V4DI 0 "register_operand" "=v")
8932	(vec_select:V4DI
8933	  (vec_concat:V8DI
8934	    (match_operand:V4DI 1 "register_operand" "v")
8935	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8936	  (parallel [(const_int 0)
8937		     (const_int 4)
8938		     (const_int 2)
8939		     (const_int 6)])))]
8940  "TARGET_AVX2 && <mask_avx512vl_condition>"
8941  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8942  [(set_attr "type" "sselog")
8943   (set_attr "prefix" "vex")
8944   (set_attr "mode" "OI")])
8945
8946(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8947  [(set (match_operand:V8DI 0 "register_operand" "=v")
8948	(vec_select:V8DI
8949	  (vec_concat:V16DI
8950	    (match_operand:V8DI 1 "register_operand" "v")
8951	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8952	  (parallel [(const_int 0) (const_int 8)
8953		     (const_int 2) (const_int 10)
8954		     (const_int 4) (const_int 12)
8955		     (const_int 6) (const_int 14)])))]
8956  "TARGET_AVX512F"
8957  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8958  [(set_attr "type" "sselog")
8959   (set_attr "prefix" "evex")
8960   (set_attr "mode" "XI")])
8961
8962(define_insn "vec_interleave_lowv2di<mask_name>"
8963  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8964	(vec_select:V2DI
8965	  (vec_concat:V4DI
8966	    (match_operand:V2DI 1 "register_operand" "0,v")
8967	    (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8968	  (parallel [(const_int 0)
8969		     (const_int 2)])))]
8970  "TARGET_SSE2 && <mask_avx512vl_condition>"
8971  "@
8972   punpcklqdq\t{%2, %0|%0, %2}
8973   vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8974  [(set_attr "isa" "noavx,avx")
8975   (set_attr "type" "sselog")
8976   (set_attr "prefix_data16" "1,*")
8977   (set_attr "prefix" "orig,vex")
8978   (set_attr "mode" "TI")])
8979
8980(define_insn "sse2_shufpd_<mode>"
8981  [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
8982	(vec_select:VI8F_128
8983	  (vec_concat:<ssedoublevecmode>
8984	    (match_operand:VI8F_128 1 "register_operand" "0,v")
8985	    (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
8986	  (parallel [(match_operand 3 "const_0_to_1_operand")
8987		     (match_operand 4 "const_2_to_3_operand")])))]
8988  "TARGET_SSE2"
8989{
8990  int mask;
8991  mask = INTVAL (operands[3]);
8992  mask |= (INTVAL (operands[4]) - 2) << 1;
8993  operands[3] = GEN_INT (mask);
8994
8995  switch (which_alternative)
8996    {
8997    case 0:
8998      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8999    case 1:
9000      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9001    default:
9002      gcc_unreachable ();
9003    }
9004}
9005  [(set_attr "isa" "noavx,avx")
9006   (set_attr "type" "sseshuf")
9007   (set_attr "length_immediate" "1")
9008   (set_attr "prefix" "orig,maybe_evex")
9009   (set_attr "mode" "V2DF")])
9010
9011;; Avoid combining registers from different units in a single alternative,
9012;; see comment above inline_secondary_memory_needed function in i386.c
9013(define_insn "sse2_storehpd"
9014  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,Yv,x,*f,r")
9015	(vec_select:DF
9016	  (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
9017	  (parallel [(const_int 1)])))]
9018  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9019  "@
9020   %vmovhpd\t{%1, %0|%0, %1}
9021   unpckhpd\t%0, %0
9022   vunpckhpd\t{%d1, %0|%0, %d1}
9023   #
9024   #
9025   #"
9026  [(set_attr "isa" "*,noavx,avx,*,*,*")
9027   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
9028   (set (attr "prefix_data16")
9029     (if_then_else
9030       (and (eq_attr "alternative" "0")
9031	    (not (match_test "TARGET_AVX")))
9032       (const_string "1")
9033       (const_string "*")))
9034   (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
9035   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
9036
9037(define_split
9038  [(set (match_operand:DF 0 "register_operand")
9039	(vec_select:DF
9040	  (match_operand:V2DF 1 "memory_operand")
9041	  (parallel [(const_int 1)])))]
9042  "TARGET_SSE2 && reload_completed"
9043  [(set (match_dup 0) (match_dup 1))]
9044  "operands[1] = adjust_address (operands[1], DFmode, 8);")
9045
9046(define_insn "*vec_extractv2df_1_sse"
9047  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9048	(vec_select:DF
9049	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
9050	  (parallel [(const_int 1)])))]
9051  "!TARGET_SSE2 && TARGET_SSE
9052   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9053  "@
9054   movhps\t{%1, %0|%q0, %1}
9055   movhlps\t{%1, %0|%0, %1}
9056   movlps\t{%H1, %0|%0, %H1}"
9057  [(set_attr "type" "ssemov")
9058   (set_attr "mode" "V2SF,V4SF,V2SF")])
9059
9060;; Avoid combining registers from different units in a single alternative,
9061;; see comment above inline_secondary_memory_needed function in i386.c
9062(define_insn "sse2_storelpd"
9063  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
9064	(vec_select:DF
9065	  (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
9066	  (parallel [(const_int 0)])))]
9067  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9068  "@
9069   %vmovlpd\t{%1, %0|%0, %1}
9070   #
9071   #
9072   #
9073   #"
9074  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
9075   (set (attr "prefix_data16")
9076     (if_then_else (eq_attr "alternative" "0")
9077		   (const_string "1")
9078		   (const_string "*")))
9079   (set_attr "prefix" "maybe_vex")
9080   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
9081
9082(define_split
9083  [(set (match_operand:DF 0 "register_operand")
9084	(vec_select:DF
9085	  (match_operand:V2DF 1 "nonimmediate_operand")
9086	  (parallel [(const_int 0)])))]
9087  "TARGET_SSE2 && reload_completed"
9088  [(set (match_dup 0) (match_dup 1))]
9089  "operands[1] = gen_lowpart (DFmode, operands[1]);")
9090
9091(define_insn "*vec_extractv2df_0_sse"
9092  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9093	(vec_select:DF
9094	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
9095	  (parallel [(const_int 0)])))]
9096  "!TARGET_SSE2 && TARGET_SSE
9097   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9098  "@
9099   movlps\t{%1, %0|%0, %1}
9100   movaps\t{%1, %0|%0, %1}
9101   movlps\t{%1, %0|%0, %q1}"
9102  [(set_attr "type" "ssemov")
9103   (set_attr "mode" "V2SF,V4SF,V2SF")])
9104
9105(define_expand "sse2_loadhpd_exp"
9106  [(set (match_operand:V2DF 0 "nonimmediate_operand")
9107	(vec_concat:V2DF
9108	  (vec_select:DF
9109	    (match_operand:V2DF 1 "nonimmediate_operand")
9110	    (parallel [(const_int 0)]))
9111	  (match_operand:DF 2 "nonimmediate_operand")))]
9112  "TARGET_SSE2"
9113{
9114  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9115
9116  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
9117
9118  /* Fix up the destination if needed.  */
9119  if (dst != operands[0])
9120    emit_move_insn (operands[0], dst);
9121
9122  DONE;
9123})
9124
9125;; Avoid combining registers from different units in a single alternative,
9126;; see comment above inline_secondary_memory_needed function in i386.c
9127(define_insn "sse2_loadhpd"
9128  [(set (match_operand:V2DF 0 "nonimmediate_operand"
9129	  "=x,v,x,v ,o,o ,o")
9130	(vec_concat:V2DF
9131	  (vec_select:DF
9132	    (match_operand:V2DF 1 "nonimmediate_operand"
9133	  " 0,v,0,v ,0,0 ,0")
9134	    (parallel [(const_int 0)]))
9135	  (match_operand:DF 2 "nonimmediate_operand"
9136	  " m,m,x,Yv,x,*f,r")))]
9137  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9138  "@
9139   movhpd\t{%2, %0|%0, %2}
9140   vmovhpd\t{%2, %1, %0|%0, %1, %2}
9141   unpcklpd\t{%2, %0|%0, %2}
9142   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9143   #
9144   #
9145   #"
9146  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
9147   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
9148   (set (attr "prefix_data16")
9149     (if_then_else (eq_attr "alternative" "0")
9150		   (const_string "1")
9151		   (const_string "*")))
9152   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
9153   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
9154
9155(define_split
9156  [(set (match_operand:V2DF 0 "memory_operand")
9157	(vec_concat:V2DF
9158	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
9159	  (match_operand:DF 1 "register_operand")))]
9160  "TARGET_SSE2 && reload_completed"
9161  [(set (match_dup 0) (match_dup 1))]
9162  "operands[0] = adjust_address (operands[0], DFmode, 8);")
9163
9164(define_expand "sse2_loadlpd_exp"
9165  [(set (match_operand:V2DF 0 "nonimmediate_operand")
9166	(vec_concat:V2DF
9167	  (match_operand:DF 2 "nonimmediate_operand")
9168	  (vec_select:DF
9169	    (match_operand:V2DF 1 "nonimmediate_operand")
9170	    (parallel [(const_int 1)]))))]
9171  "TARGET_SSE2"
9172{
9173  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9174
9175  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9176
9177  /* Fix up the destination if needed.  */
9178  if (dst != operands[0])
9179    emit_move_insn (operands[0], dst);
9180
9181  DONE;
9182})
9183
9184;; Avoid combining registers from different units in a single alternative,
9185;; see comment above inline_secondary_memory_needed function in i386.c
9186(define_insn "sse2_loadlpd"
9187  [(set (match_operand:V2DF 0 "nonimmediate_operand"
9188	  "=v,x,v,x,v,x,x,v,m,m ,m")
9189	(vec_concat:V2DF
9190	  (match_operand:DF 2 "nonimmediate_operand"
9191	  "vm,m,m,x,v,0,0,v,x,*f,r")
9192	  (vec_select:DF
9193	    (match_operand:V2DF 1 "vector_move_operand"
9194	  " C,0,v,0,v,x,o,o,0,0 ,0")
9195	    (parallel [(const_int 1)]))))]
9196  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9197  "@
9198   %vmovq\t{%2, %0|%0, %2}
9199   movlpd\t{%2, %0|%0, %2}
9200   vmovlpd\t{%2, %1, %0|%0, %1, %2}
9201   movsd\t{%2, %0|%0, %2}
9202   vmovsd\t{%2, %1, %0|%0, %1, %2}
9203   shufpd\t{$2, %1, %0|%0, %1, 2}
9204   movhpd\t{%H1, %0|%0, %H1}
9205   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9206   #
9207   #
9208   #"
9209  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9210   (set (attr "type")
9211     (cond [(eq_attr "alternative" "5")
9212	      (const_string "sselog")
9213	    (eq_attr "alternative" "9")
9214	      (const_string "fmov")
9215	    (eq_attr "alternative" "10")
9216	      (const_string "imov")
9217	   ]
9218	   (const_string "ssemov")))
9219   (set (attr "prefix_data16")
9220     (if_then_else (eq_attr "alternative" "1,6")
9221		   (const_string "1")
9222		   (const_string "*")))
9223   (set (attr "length_immediate")
9224     (if_then_else (eq_attr "alternative" "5")
9225		   (const_string "1")
9226		   (const_string "*")))
9227   (set (attr "prefix")
9228     (cond [(eq_attr "alternative" "0")
9229	      (const_string "maybe_vex")
9230	    (eq_attr "alternative" "1,3,5,6")
9231	      (const_string "orig")
9232	    (eq_attr "alternative" "2,4,7")
9233	      (const_string "maybe_evex")
9234	   ]
9235	   (const_string "*")))
9236   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9237
9238(define_split
9239  [(set (match_operand:V2DF 0 "memory_operand")
9240	(vec_concat:V2DF
9241	  (match_operand:DF 1 "register_operand")
9242	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9243  "TARGET_SSE2 && reload_completed"
9244  [(set (match_dup 0) (match_dup 1))]
9245  "operands[0] = adjust_address (operands[0], DFmode, 0);")
9246
9247(define_insn "sse2_movsd"
9248  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,v,x,v,m,x,x,v,o")
9249	(vec_merge:V2DF
9250	  (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9251	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9252	  (const_int 1)))]
9253  "TARGET_SSE2"
9254  "@
9255   movsd\t{%2, %0|%0, %2}
9256   vmovsd\t{%2, %1, %0|%0, %1, %2}
9257   movlpd\t{%2, %0|%0, %q2}
9258   vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9259   %vmovlpd\t{%2, %0|%q0, %2}
9260   shufpd\t{$2, %1, %0|%0, %1, 2}
9261   movhps\t{%H1, %0|%0, %H1}
9262   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9263   %vmovhps\t{%1, %H0|%H0, %1}"
9264  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9265   (set (attr "type")
9266     (if_then_else
9267       (eq_attr "alternative" "5")
9268       (const_string "sselog")
9269       (const_string "ssemov")))
9270   (set (attr "prefix_data16")
9271     (if_then_else
9272       (and (eq_attr "alternative" "2,4")
9273	    (not (match_test "TARGET_AVX")))
9274       (const_string "1")
9275       (const_string "*")))
9276   (set (attr "length_immediate")
9277     (if_then_else (eq_attr "alternative" "5")
9278		   (const_string "1")
9279		   (const_string "*")))
9280   (set (attr "prefix")
9281     (cond [(eq_attr "alternative" "1,3,7")
9282	      (const_string "maybe_evex")
9283	    (eq_attr "alternative" "4,8")
9284	      (const_string "maybe_vex")
9285	   ]
9286	   (const_string "orig")))
9287   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9288
9289(define_insn "vec_dupv2df<mask_name>"
9290  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v")
9291	(vec_duplicate:V2DF
9292	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
9293  "TARGET_SSE2 && <mask_avx512vl_condition>"
9294  "@
9295   unpcklpd\t%0, %0
9296   %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
9297   vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
9298  [(set_attr "isa" "noavx,sse3,avx512vl")
9299   (set_attr "type" "sselog1")
9300   (set_attr "prefix" "orig,maybe_vex,evex")
9301   (set_attr "mode" "V2DF,DF,DF")])
9302
9303(define_insn "vec_concatv2df"
9304  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x, v,x,x")
9305	(vec_concat:V2DF
9306	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
9307	  (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m, C,x,m")))]
9308  "TARGET_SSE
9309   && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
9310       || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
9311  "@
9312   unpcklpd\t{%2, %0|%0, %2}
9313   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9314   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9315   %vmovddup\t{%1, %0|%0, %1}
9316   vmovddup\t{%1, %0|%0, %1}
9317   movhpd\t{%2, %0|%0, %2}
9318   vmovhpd\t{%2, %1, %0|%0, %1, %2}
9319   %vmovq\t{%1, %0|%0, %1}
9320   movlhps\t{%2, %0|%0, %2}
9321   movhps\t{%2, %0|%0, %2}"
9322  [(set (attr "isa")
9323     (cond [(eq_attr "alternative" "0,5")
9324	      (const_string "sse2_noavx")
9325	    (eq_attr "alternative" "1,6")
9326	      (const_string "avx")
9327	    (eq_attr "alternative" "2,4")
9328	      (const_string "avx512vl")
9329	    (eq_attr "alternative" "3")
9330	      (const_string "sse3")
9331	    (eq_attr "alternative" "7")
9332	      (const_string "sse2")
9333	   ]
9334	   (const_string "noavx")))
9335   (set (attr "type")
9336     (if_then_else
9337       (eq_attr "alternative" "0,1,2,3,4")
9338       (const_string "sselog")
9339       (const_string "ssemov")))
9340   (set (attr "prefix_data16")
9341	(if_then_else (eq_attr "alternative" "5")
9342		      (const_string "1")
9343		      (const_string "*")))
9344   (set (attr "prefix")
9345     (cond [(eq_attr "alternative" "1,6")
9346	      (const_string "vex")
9347	    (eq_attr "alternative" "2,4")
9348	      (const_string "evex")
9349	    (eq_attr "alternative" "3,7")
9350	      (const_string "maybe_vex")
9351	   ]
9352	   (const_string "orig")))
9353   (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
9354
9355;; vmovq clears also the higher bits.
9356(define_insn "vec_set<mode>_0"
9357  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
9358	(vec_merge:VF2_512_256
9359	  (vec_duplicate:VF2_512_256
9360	    (match_operand:<ssescalarmode> 2 "general_operand" "xm"))
9361	  (match_operand:VF2_512_256 1 "const0_operand" "C")
9362	  (const_int 1)))]
9363  "TARGET_AVX"
9364  "vmovq\t{%2, %x0|%x0, %2}"
9365  [(set_attr "type" "ssemov")
9366   (set_attr "prefix" "maybe_evex")
9367   (set_attr "mode" "DF")])
9368
9369;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9370;;
9371;; Parallel integer down-conversion operations
9372;;
9373;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9374
9375(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
9376(define_mode_attr pmov_src_mode
9377  [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
9378(define_mode_attr pmov_src_lower
9379  [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
9380(define_mode_attr pmov_suff_1
9381  [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
9382
9383(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
9384  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9385	(any_truncate:PMOV_DST_MODE_1
9386	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
9387  "TARGET_AVX512F"
9388  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
9389  [(set_attr "type" "ssemov")
9390   (set_attr "memory" "none,store")
9391   (set_attr "prefix" "evex")
9392   (set_attr "mode" "<sseinsnmode>")])
9393
9394(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9395  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9396    (vec_merge:PMOV_DST_MODE_1
9397      (any_truncate:PMOV_DST_MODE_1
9398        (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9399      (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
9400      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9401  "TARGET_AVX512F"
9402  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9403  [(set_attr "type" "ssemov")
9404   (set_attr "memory" "none,store")
9405   (set_attr "prefix" "evex")
9406   (set_attr "mode" "<sseinsnmode>")])
9407
9408(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9409  [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9410    (vec_merge:PMOV_DST_MODE_1
9411      (any_truncate:PMOV_DST_MODE_1
9412        (match_operand:<pmov_src_mode> 1 "register_operand"))
9413      (match_dup 0)
9414      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9415  "TARGET_AVX512F")
9416
9417(define_insn "avx512bw_<code>v32hiv32qi2"
9418  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9419	(any_truncate:V32QI
9420	    (match_operand:V32HI 1 "register_operand" "v,v")))]
9421  "TARGET_AVX512BW"
9422  "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9423  [(set_attr "type" "ssemov")
9424   (set_attr "memory" "none,store")
9425   (set_attr "prefix" "evex")
9426   (set_attr "mode" "XI")])
9427
9428(define_insn "avx512bw_<code>v32hiv32qi2_mask"
9429  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9430    (vec_merge:V32QI
9431      (any_truncate:V32QI
9432        (match_operand:V32HI 1 "register_operand" "v,v"))
9433      (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9434      (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9435  "TARGET_AVX512BW"
9436  "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9437  [(set_attr "type" "ssemov")
9438   (set_attr "memory" "none,store")
9439   (set_attr "prefix" "evex")
9440   (set_attr "mode" "XI")])
9441
9442(define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9443  [(set (match_operand:V32QI 0 "nonimmediate_operand")
9444    (vec_merge:V32QI
9445      (any_truncate:V32QI
9446        (match_operand:V32HI 1 "register_operand"))
9447      (match_dup 0)
9448      (match_operand:SI 2 "register_operand")))]
9449  "TARGET_AVX512BW")
9450
9451(define_mode_iterator PMOV_DST_MODE_2
9452  [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9453(define_mode_attr pmov_suff_2
9454  [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9455
9456(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9457  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9458	(any_truncate:PMOV_DST_MODE_2
9459	    (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9460  "TARGET_AVX512VL"
9461  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9462  [(set_attr "type" "ssemov")
9463   (set_attr "memory" "none,store")
9464   (set_attr "prefix" "evex")
9465   (set_attr "mode" "<sseinsnmode>")])
9466
9467(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9468  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9469    (vec_merge:PMOV_DST_MODE_2
9470      (any_truncate:PMOV_DST_MODE_2
9471        (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9472      (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9473      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9474  "TARGET_AVX512VL"
9475  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9476  [(set_attr "type" "ssemov")
9477   (set_attr "memory" "none,store")
9478   (set_attr "prefix" "evex")
9479   (set_attr "mode" "<sseinsnmode>")])
9480
9481(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9482  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9483    (vec_merge:PMOV_DST_MODE_2
9484      (any_truncate:PMOV_DST_MODE_2
9485        (match_operand:<ssedoublemode> 1 "register_operand"))
9486      (match_dup 0)
9487      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9488  "TARGET_AVX512VL")
9489
9490(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9491(define_mode_attr pmov_dst_3
9492  [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9493(define_mode_attr pmov_dst_zeroed_3
9494  [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9495(define_mode_attr pmov_suff_3
9496  [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9497
9498(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9499  [(set (match_operand:V16QI 0 "register_operand" "=v")
9500    (vec_concat:V16QI
9501      (any_truncate:<pmov_dst_3>
9502	      (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9503      (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9504  "TARGET_AVX512VL"
9505  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9506  [(set_attr "type" "ssemov")
9507   (set_attr "prefix" "evex")
9508   (set_attr "mode" "TI")])
9509
9510(define_insn "*avx512vl_<code>v2div2qi2_store"
9511  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9512    (vec_concat:V16QI
9513      (any_truncate:V2QI
9514	      (match_operand:V2DI 1 "register_operand" "v"))
9515      (vec_select:V14QI
9516        (match_dup 0)
9517        (parallel [(const_int 2) (const_int 3)
9518                   (const_int 4) (const_int 5)
9519                   (const_int 6) (const_int 7)
9520                   (const_int 8) (const_int 9)
9521                   (const_int 10) (const_int 11)
9522                   (const_int 12) (const_int 13)
9523                   (const_int 14) (const_int 15)]))))]
9524  "TARGET_AVX512VL"
9525  "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
9526  [(set_attr "type" "ssemov")
9527   (set_attr "memory" "store")
9528   (set_attr "prefix" "evex")
9529   (set_attr "mode" "TI")])
9530
9531(define_insn "avx512vl_<code>v2div2qi2_mask"
9532  [(set (match_operand:V16QI 0 "register_operand" "=v")
9533    (vec_concat:V16QI
9534      (vec_merge:V2QI
9535        (any_truncate:V2QI
9536          (match_operand:V2DI 1 "register_operand" "v"))
9537        (vec_select:V2QI
9538          (match_operand:V16QI 2 "vector_move_operand" "0C")
9539          (parallel [(const_int 0) (const_int 1)]))
9540        (match_operand:QI 3 "register_operand" "Yk"))
9541      (const_vector:V14QI [(const_int 0) (const_int 0)
9542                           (const_int 0) (const_int 0)
9543                           (const_int 0) (const_int 0)
9544                           (const_int 0) (const_int 0)
9545                           (const_int 0) (const_int 0)
9546                           (const_int 0) (const_int 0)
9547                           (const_int 0) (const_int 0)])))]
9548  "TARGET_AVX512VL"
9549  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9550  [(set_attr "type" "ssemov")
9551   (set_attr "prefix" "evex")
9552   (set_attr "mode" "TI")])
9553
9554(define_insn "*avx512vl_<code>v2div2qi2_mask_1"
9555  [(set (match_operand:V16QI 0 "register_operand" "=v")
9556    (vec_concat:V16QI
9557      (vec_merge:V2QI
9558	(any_truncate:V2QI
9559	  (match_operand:V2DI 1 "register_operand" "v"))
9560	(const_vector:V2QI [(const_int 0) (const_int 0)])
9561	(match_operand:QI 2 "register_operand" "Yk"))
9562      (const_vector:V14QI [(const_int 0) (const_int 0)
9563			   (const_int 0) (const_int 0)
9564			   (const_int 0) (const_int 0)
9565			   (const_int 0) (const_int 0)
9566			   (const_int 0) (const_int 0)
9567			   (const_int 0) (const_int 0)
9568			   (const_int 0) (const_int 0)])))]
9569  "TARGET_AVX512VL"
9570  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9571  [(set_attr "type" "ssemov")
9572   (set_attr "prefix" "evex")
9573   (set_attr "mode" "TI")])
9574
9575(define_insn "avx512vl_<code>v2div2qi2_mask_store"
9576  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9577    (vec_concat:V16QI
9578      (vec_merge:V2QI
9579        (any_truncate:V2QI
9580          (match_operand:V2DI 1 "register_operand" "v"))
9581        (vec_select:V2QI
9582          (match_dup 0)
9583          (parallel [(const_int 0) (const_int 1)]))
9584        (match_operand:QI 2 "register_operand" "Yk"))
9585      (vec_select:V14QI
9586        (match_dup 0)
9587        (parallel [(const_int 2) (const_int 3)
9588                   (const_int 4) (const_int 5)
9589                   (const_int 6) (const_int 7)
9590                   (const_int 8) (const_int 9)
9591                   (const_int 10) (const_int 11)
9592                   (const_int 12) (const_int 13)
9593                   (const_int 14) (const_int 15)]))))]
9594  "TARGET_AVX512VL"
9595  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
9596  [(set_attr "type" "ssemov")
9597   (set_attr "memory" "store")
9598   (set_attr "prefix" "evex")
9599   (set_attr "mode" "TI")])
9600
9601(define_insn "*avx512vl_<code><mode>v4qi2_store"
9602  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9603    (vec_concat:V16QI
9604      (any_truncate:V4QI
9605	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9606      (vec_select:V12QI
9607        (match_dup 0)
9608        (parallel [(const_int 4) (const_int 5)
9609                   (const_int 6) (const_int 7)
9610                   (const_int 8) (const_int 9)
9611                   (const_int 10) (const_int 11)
9612                   (const_int 12) (const_int 13)
9613                   (const_int 14) (const_int 15)]))))]
9614  "TARGET_AVX512VL"
9615  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
9616  [(set_attr "type" "ssemov")
9617   (set_attr "memory" "store")
9618   (set_attr "prefix" "evex")
9619   (set_attr "mode" "TI")])
9620
9621(define_insn "avx512vl_<code><mode>v4qi2_mask"
9622  [(set (match_operand:V16QI 0 "register_operand" "=v")
9623    (vec_concat:V16QI
9624      (vec_merge:V4QI
9625        (any_truncate:V4QI
9626          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9627        (vec_select:V4QI
9628          (match_operand:V16QI 2 "vector_move_operand" "0C")
9629          (parallel [(const_int 0) (const_int 1)
9630                     (const_int 2) (const_int 3)]))
9631        (match_operand:QI 3 "register_operand" "Yk"))
9632      (const_vector:V12QI [(const_int 0) (const_int 0)
9633                           (const_int 0) (const_int 0)
9634                           (const_int 0) (const_int 0)
9635                           (const_int 0) (const_int 0)
9636                           (const_int 0) (const_int 0)
9637                           (const_int 0) (const_int 0)])))]
9638  "TARGET_AVX512VL"
9639  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9640  [(set_attr "type" "ssemov")
9641   (set_attr "prefix" "evex")
9642   (set_attr "mode" "TI")])
9643
9644(define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
9645  [(set (match_operand:V16QI 0 "register_operand" "=v")
9646    (vec_concat:V16QI
9647      (vec_merge:V4QI
9648	(any_truncate:V4QI
9649	  (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9650	(const_vector:V4QI [(const_int 0) (const_int 0)
9651			    (const_int 0) (const_int 0)])
9652	(match_operand:QI 2 "register_operand" "Yk"))
9653      (const_vector:V12QI [(const_int 0) (const_int 0)
9654			   (const_int 0) (const_int 0)
9655			   (const_int 0) (const_int 0)
9656			   (const_int 0) (const_int 0)
9657			   (const_int 0) (const_int 0)
9658			   (const_int 0) (const_int 0)])))]
9659  "TARGET_AVX512VL"
9660  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9661  [(set_attr "type" "ssemov")
9662   (set_attr "prefix" "evex")
9663   (set_attr "mode" "TI")])
9664
9665(define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9666  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9667    (vec_concat:V16QI
9668      (vec_merge:V4QI
9669        (any_truncate:V4QI
9670          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9671        (vec_select:V4QI
9672          (match_dup 0)
9673          (parallel [(const_int 0) (const_int 1)
9674                     (const_int 2) (const_int 3)]))
9675        (match_operand:QI 2 "register_operand" "Yk"))
9676      (vec_select:V12QI
9677        (match_dup 0)
9678        (parallel [(const_int 4) (const_int 5)
9679                   (const_int 6) (const_int 7)
9680                   (const_int 8) (const_int 9)
9681                   (const_int 10) (const_int 11)
9682                   (const_int 12) (const_int 13)
9683                   (const_int 14) (const_int 15)]))))]
9684  "TARGET_AVX512VL"
9685  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
9686  [(set_attr "type" "ssemov")
9687   (set_attr "memory" "store")
9688   (set_attr "prefix" "evex")
9689   (set_attr "mode" "TI")])
9690
9691(define_mode_iterator VI2_128_BW_4_256
9692  [(V8HI "TARGET_AVX512BW") V8SI])
9693
9694(define_insn "*avx512vl_<code><mode>v8qi2_store"
9695  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9696    (vec_concat:V16QI
9697      (any_truncate:V8QI
9698	      (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9699      (vec_select:V8QI
9700        (match_dup 0)
9701        (parallel [(const_int 8) (const_int 9)
9702                   (const_int 10) (const_int 11)
9703                   (const_int 12) (const_int 13)
9704                   (const_int 14) (const_int 15)]))))]
9705  "TARGET_AVX512VL"
9706  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
9707  [(set_attr "type" "ssemov")
9708   (set_attr "memory" "store")
9709   (set_attr "prefix" "evex")
9710   (set_attr "mode" "TI")])
9711
9712(define_insn "avx512vl_<code><mode>v8qi2_mask"
9713  [(set (match_operand:V16QI 0 "register_operand" "=v")
9714    (vec_concat:V16QI
9715      (vec_merge:V8QI
9716        (any_truncate:V8QI
9717          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9718        (vec_select:V8QI
9719          (match_operand:V16QI 2 "vector_move_operand" "0C")
9720          (parallel [(const_int 0) (const_int 1)
9721                     (const_int 2) (const_int 3)
9722                     (const_int 4) (const_int 5)
9723                     (const_int 6) (const_int 7)]))
9724        (match_operand:QI 3 "register_operand" "Yk"))
9725      (const_vector:V8QI [(const_int 0) (const_int 0)
9726                          (const_int 0) (const_int 0)
9727                          (const_int 0) (const_int 0)
9728                          (const_int 0) (const_int 0)])))]
9729  "TARGET_AVX512VL"
9730  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9731  [(set_attr "type" "ssemov")
9732   (set_attr "prefix" "evex")
9733   (set_attr "mode" "TI")])
9734
9735(define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
9736  [(set (match_operand:V16QI 0 "register_operand" "=v")
9737    (vec_concat:V16QI
9738      (vec_merge:V8QI
9739	(any_truncate:V8QI
9740	  (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9741	(const_vector:V8QI [(const_int 0) (const_int 0)
9742			    (const_int 0) (const_int 0)
9743			    (const_int 0) (const_int 0)
9744			    (const_int 0) (const_int 0)])
9745	(match_operand:QI 2 "register_operand" "Yk"))
9746      (const_vector:V8QI [(const_int 0) (const_int 0)
9747			  (const_int 0) (const_int 0)
9748			  (const_int 0) (const_int 0)
9749			  (const_int 0) (const_int 0)])))]
9750  "TARGET_AVX512VL"
9751  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9752  [(set_attr "type" "ssemov")
9753   (set_attr "prefix" "evex")
9754   (set_attr "mode" "TI")])
9755
9756(define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9757  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9758    (vec_concat:V16QI
9759      (vec_merge:V8QI
9760        (any_truncate:V8QI
9761          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9762        (vec_select:V8QI
9763          (match_dup 0)
9764          (parallel [(const_int 0) (const_int 1)
9765                     (const_int 2) (const_int 3)
9766                     (const_int 4) (const_int 5)
9767                     (const_int 6) (const_int 7)]))
9768        (match_operand:QI 2 "register_operand" "Yk"))
9769      (vec_select:V8QI
9770        (match_dup 0)
9771        (parallel [(const_int 8) (const_int 9)
9772                   (const_int 10) (const_int 11)
9773                   (const_int 12) (const_int 13)
9774                   (const_int 14) (const_int 15)]))))]
9775  "TARGET_AVX512VL"
9776  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
9777  [(set_attr "type" "ssemov")
9778   (set_attr "memory" "store")
9779   (set_attr "prefix" "evex")
9780   (set_attr "mode" "TI")])
9781
9782(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9783(define_mode_attr pmov_dst_4
9784  [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9785(define_mode_attr pmov_dst_zeroed_4
9786  [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9787(define_mode_attr pmov_suff_4
9788  [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9789
9790(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9791  [(set (match_operand:V8HI 0 "register_operand" "=v")
9792    (vec_concat:V8HI
9793      (any_truncate:<pmov_dst_4>
9794	      (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9795      (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9796  "TARGET_AVX512VL"
9797  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9798  [(set_attr "type" "ssemov")
9799   (set_attr "prefix" "evex")
9800   (set_attr "mode" "TI")])
9801
9802(define_insn "*avx512vl_<code><mode>v4hi2_store"
9803  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9804    (vec_concat:V8HI
9805      (any_truncate:V4HI
9806	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9807      (vec_select:V4HI
9808        (match_dup 0)
9809        (parallel [(const_int 4) (const_int 5)
9810                   (const_int 6) (const_int 7)]))))]
9811  "TARGET_AVX512VL"
9812  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9813  [(set_attr "type" "ssemov")
9814   (set_attr "memory" "store")
9815   (set_attr "prefix" "evex")
9816   (set_attr "mode" "TI")])
9817
9818(define_insn "avx512vl_<code><mode>v4hi2_mask"
9819  [(set (match_operand:V8HI 0 "register_operand" "=v")
9820    (vec_concat:V8HI
9821      (vec_merge:V4HI
9822        (any_truncate:V4HI
9823          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9824        (vec_select:V4HI
9825          (match_operand:V8HI 2 "vector_move_operand" "0C")
9826          (parallel [(const_int 0) (const_int 1)
9827                     (const_int 2) (const_int 3)]))
9828        (match_operand:QI 3 "register_operand" "Yk"))
9829      (const_vector:V4HI [(const_int 0) (const_int 0)
9830                          (const_int 0) (const_int 0)])))]
9831  "TARGET_AVX512VL"
9832  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9833  [(set_attr "type" "ssemov")
9834   (set_attr "prefix" "evex")
9835   (set_attr "mode" "TI")])
9836
9837(define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9838  [(set (match_operand:V8HI 0 "register_operand" "=v")
9839    (vec_concat:V8HI
9840      (vec_merge:V4HI
9841	(any_truncate:V4HI
9842	  (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9843	(const_vector:V4HI [(const_int 0) (const_int 0)
9844			    (const_int 0) (const_int 0)])
9845	(match_operand:QI 2 "register_operand" "Yk"))
9846      (const_vector:V4HI [(const_int 0) (const_int 0)
9847			  (const_int 0) (const_int 0)])))]
9848  "TARGET_AVX512VL"
9849  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9850  [(set_attr "type" "ssemov")
9851   (set_attr "prefix" "evex")
9852   (set_attr "mode" "TI")])
9853
9854(define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9855  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9856    (vec_concat:V8HI
9857      (vec_merge:V4HI
9858        (any_truncate:V4HI
9859          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9860        (vec_select:V4HI
9861          (match_dup 0)
9862          (parallel [(const_int 0) (const_int 1)
9863                     (const_int 2) (const_int 3)]))
9864        (match_operand:QI 2 "register_operand" "Yk"))
9865      (vec_select:V4HI
9866        (match_dup 0)
9867        (parallel [(const_int 4) (const_int 5)
9868                   (const_int 6) (const_int 7)]))))]
9869  "TARGET_AVX512VL"
9870{
9871  if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9872    return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
9873  return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9874}
9875  [(set_attr "type" "ssemov")
9876   (set_attr "memory" "store")
9877   (set_attr "prefix" "evex")
9878   (set_attr "mode" "TI")])
9879
9880(define_insn "*avx512vl_<code>v2div2hi2_store"
9881  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9882    (vec_concat:V8HI
9883      (any_truncate:V2HI
9884	      (match_operand:V2DI 1 "register_operand" "v"))
9885      (vec_select:V6HI
9886        (match_dup 0)
9887        (parallel [(const_int 2) (const_int 3)
9888                   (const_int 4) (const_int 5)
9889                   (const_int 6) (const_int 7)]))))]
9890  "TARGET_AVX512VL"
9891  "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9892  [(set_attr "type" "ssemov")
9893   (set_attr "memory" "store")
9894   (set_attr "prefix" "evex")
9895   (set_attr "mode" "TI")])
9896
9897(define_insn "avx512vl_<code>v2div2hi2_mask"
9898  [(set (match_operand:V8HI 0 "register_operand" "=v")
9899    (vec_concat:V8HI
9900      (vec_merge:V2HI
9901        (any_truncate:V2HI
9902          (match_operand:V2DI 1 "register_operand" "v"))
9903        (vec_select:V2HI
9904          (match_operand:V8HI 2 "vector_move_operand" "0C")
9905          (parallel [(const_int 0) (const_int 1)]))
9906        (match_operand:QI 3 "register_operand" "Yk"))
9907      (const_vector:V6HI [(const_int 0) (const_int 0)
9908                          (const_int 0) (const_int 0)
9909                          (const_int 0) (const_int 0)])))]
9910  "TARGET_AVX512VL"
9911  "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9912  [(set_attr "type" "ssemov")
9913   (set_attr "prefix" "evex")
9914   (set_attr "mode" "TI")])
9915
9916(define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9917  [(set (match_operand:V8HI 0 "register_operand" "=v")
9918    (vec_concat:V8HI
9919      (vec_merge:V2HI
9920	(any_truncate:V2HI
9921	  (match_operand:V2DI 1 "register_operand" "v"))
9922	(const_vector:V2HI [(const_int 0) (const_int 0)])
9923	(match_operand:QI 2 "register_operand" "Yk"))
9924      (const_vector:V6HI [(const_int 0) (const_int 0)
9925			  (const_int 0) (const_int 0)
9926			  (const_int 0) (const_int 0)])))]
9927  "TARGET_AVX512VL"
9928  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9929  [(set_attr "type" "ssemov")
9930   (set_attr "prefix" "evex")
9931   (set_attr "mode" "TI")])
9932
9933(define_insn "avx512vl_<code>v2div2hi2_mask_store"
9934  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9935    (vec_concat:V8HI
9936      (vec_merge:V2HI
9937        (any_truncate:V2HI
9938          (match_operand:V2DI 1 "register_operand" "v"))
9939        (vec_select:V2HI
9940          (match_dup 0)
9941          (parallel [(const_int 0) (const_int 1)]))
9942        (match_operand:QI 2 "register_operand" "Yk"))
9943      (vec_select:V6HI
9944        (match_dup 0)
9945        (parallel [(const_int 2) (const_int 3)
9946                   (const_int 4) (const_int 5)
9947                   (const_int 6) (const_int 7)]))))]
9948  "TARGET_AVX512VL"
9949  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
9950  [(set_attr "type" "ssemov")
9951   (set_attr "memory" "store")
9952   (set_attr "prefix" "evex")
9953   (set_attr "mode" "TI")])
9954
9955(define_insn "*avx512vl_<code>v2div2si2"
9956  [(set (match_operand:V4SI 0 "register_operand" "=v")
9957    (vec_concat:V4SI
9958      (any_truncate:V2SI
9959	      (match_operand:V2DI 1 "register_operand" "v"))
9960      (match_operand:V2SI 2 "const0_operand")))]
9961  "TARGET_AVX512VL"
9962  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9963  [(set_attr "type" "ssemov")
9964   (set_attr "prefix" "evex")
9965   (set_attr "mode" "TI")])
9966
9967(define_insn "*avx512vl_<code>v2div2si2_store"
9968  [(set (match_operand:V4SI 0 "memory_operand" "=m")
9969    (vec_concat:V4SI
9970      (any_truncate:V2SI
9971	      (match_operand:V2DI 1 "register_operand" "v"))
9972      (vec_select:V2SI
9973        (match_dup 0)
9974        (parallel [(const_int 2) (const_int 3)]))))]
9975  "TARGET_AVX512VL"
9976  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9977  [(set_attr "type" "ssemov")
9978   (set_attr "memory" "store")
9979   (set_attr "prefix" "evex")
9980   (set_attr "mode" "TI")])
9981
9982(define_insn "avx512vl_<code>v2div2si2_mask"
9983  [(set (match_operand:V4SI 0 "register_operand" "=v")
9984    (vec_concat:V4SI
9985      (vec_merge:V2SI
9986        (any_truncate:V2SI
9987          (match_operand:V2DI 1 "register_operand" "v"))
9988        (vec_select:V2SI
9989          (match_operand:V4SI 2 "vector_move_operand" "0C")
9990          (parallel [(const_int 0) (const_int 1)]))
9991        (match_operand:QI 3 "register_operand" "Yk"))
9992      (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9993  "TARGET_AVX512VL"
9994  "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9995  [(set_attr "type" "ssemov")
9996   (set_attr "prefix" "evex")
9997   (set_attr "mode" "TI")])
9998
9999(define_insn "*avx512vl_<code>v2div2si2_mask_1"
10000  [(set (match_operand:V4SI 0 "register_operand" "=v")
10001    (vec_concat:V4SI
10002      (vec_merge:V2SI
10003	(any_truncate:V2SI
10004	  (match_operand:V2DI 1 "register_operand" "v"))
10005	(const_vector:V2SI [(const_int 0) (const_int 0)])
10006	(match_operand:QI 2 "register_operand" "Yk"))
10007      (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10008  "TARGET_AVX512VL"
10009  "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10010  [(set_attr "type" "ssemov")
10011   (set_attr "prefix" "evex")
10012   (set_attr "mode" "TI")])
10013
10014(define_insn "avx512vl_<code>v2div2si2_mask_store"
10015  [(set (match_operand:V4SI 0 "memory_operand" "=m")
10016    (vec_concat:V4SI
10017      (vec_merge:V2SI
10018        (any_truncate:V2SI
10019          (match_operand:V2DI 1 "register_operand" "v"))
10020        (vec_select:V2SI
10021          (match_dup 0)
10022          (parallel [(const_int 0) (const_int 1)]))
10023        (match_operand:QI 2 "register_operand" "Yk"))
10024      (vec_select:V2SI
10025        (match_dup 0)
10026        (parallel [(const_int 2) (const_int 3)]))))]
10027  "TARGET_AVX512VL"
10028  "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
10029  [(set_attr "type" "ssemov")
10030   (set_attr "memory" "store")
10031   (set_attr "prefix" "evex")
10032   (set_attr "mode" "TI")])
10033
10034(define_insn "*avx512f_<code>v8div16qi2"
10035  [(set (match_operand:V16QI 0 "register_operand" "=v")
10036	(vec_concat:V16QI
10037	  (any_truncate:V8QI
10038	    (match_operand:V8DI 1 "register_operand" "v"))
10039	  (const_vector:V8QI [(const_int 0) (const_int 0)
10040			      (const_int 0) (const_int 0)
10041			      (const_int 0) (const_int 0)
10042			      (const_int 0) (const_int 0)])))]
10043  "TARGET_AVX512F"
10044  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10045  [(set_attr "type" "ssemov")
10046   (set_attr "prefix" "evex")
10047   (set_attr "mode" "TI")])
10048
10049(define_insn "*avx512f_<code>v8div16qi2_store"
10050  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10051	(vec_concat:V16QI
10052	  (any_truncate:V8QI
10053	    (match_operand:V8DI 1 "register_operand" "v"))
10054	  (vec_select:V8QI
10055	    (match_dup 0)
10056	    (parallel [(const_int 8) (const_int 9)
10057		       (const_int 10) (const_int 11)
10058		       (const_int 12) (const_int 13)
10059		       (const_int 14) (const_int 15)]))))]
10060  "TARGET_AVX512F"
10061  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10062  [(set_attr "type" "ssemov")
10063   (set_attr "memory" "store")
10064   (set_attr "prefix" "evex")
10065   (set_attr "mode" "TI")])
10066
10067(define_insn "avx512f_<code>v8div16qi2_mask"
10068  [(set (match_operand:V16QI 0 "register_operand" "=v")
10069    (vec_concat:V16QI
10070      (vec_merge:V8QI
10071        (any_truncate:V8QI
10072          (match_operand:V8DI 1 "register_operand" "v"))
10073        (vec_select:V8QI
10074          (match_operand:V16QI 2 "vector_move_operand" "0C")
10075          (parallel [(const_int 0) (const_int 1)
10076                     (const_int 2) (const_int 3)
10077                     (const_int 4) (const_int 5)
10078                     (const_int 6) (const_int 7)]))
10079        (match_operand:QI 3 "register_operand" "Yk"))
10080      (const_vector:V8QI [(const_int 0) (const_int 0)
10081                          (const_int 0) (const_int 0)
10082                          (const_int 0) (const_int 0)
10083                          (const_int 0) (const_int 0)])))]
10084  "TARGET_AVX512F"
10085  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10086  [(set_attr "type" "ssemov")
10087   (set_attr "prefix" "evex")
10088   (set_attr "mode" "TI")])
10089
10090(define_insn "*avx512f_<code>v8div16qi2_mask_1"
10091  [(set (match_operand:V16QI 0 "register_operand" "=v")
10092    (vec_concat:V16QI
10093      (vec_merge:V8QI
10094	(any_truncate:V8QI
10095	  (match_operand:V8DI 1 "register_operand" "v"))
10096	(const_vector:V8QI [(const_int 0) (const_int 0)
10097			    (const_int 0) (const_int 0)
10098			    (const_int 0) (const_int 0)
10099			    (const_int 0) (const_int 0)])
10100	(match_operand:QI 2 "register_operand" "Yk"))
10101      (const_vector:V8QI [(const_int 0) (const_int 0)
10102			  (const_int 0) (const_int 0)
10103			  (const_int 0) (const_int 0)
10104			  (const_int 0) (const_int 0)])))]
10105  "TARGET_AVX512F"
10106  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10107  [(set_attr "type" "ssemov")
10108   (set_attr "prefix" "evex")
10109   (set_attr "mode" "TI")])
10110
10111(define_insn "avx512f_<code>v8div16qi2_mask_store"
10112  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10113    (vec_concat:V16QI
10114      (vec_merge:V8QI
10115        (any_truncate:V8QI
10116          (match_operand:V8DI 1 "register_operand" "v"))
10117        (vec_select:V8QI
10118          (match_dup 0)
10119          (parallel [(const_int 0) (const_int 1)
10120                     (const_int 2) (const_int 3)
10121                     (const_int 4) (const_int 5)
10122                     (const_int 6) (const_int 7)]))
10123        (match_operand:QI 2 "register_operand" "Yk"))
10124      (vec_select:V8QI
10125        (match_dup 0)
10126        (parallel [(const_int 8) (const_int 9)
10127                   (const_int 10) (const_int 11)
10128                   (const_int 12) (const_int 13)
10129                   (const_int 14) (const_int 15)]))))]
10130  "TARGET_AVX512F"
10131  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10132  [(set_attr "type" "ssemov")
10133   (set_attr "memory" "store")
10134   (set_attr "prefix" "evex")
10135   (set_attr "mode" "TI")])
10136
10137;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10138;;
10139;; Parallel integral arithmetic
10140;;
10141;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10142
10143(define_expand "neg<mode>2"
10144  [(set (match_operand:VI_AVX2 0 "register_operand")
10145	(minus:VI_AVX2
10146	  (match_dup 2)
10147	  (match_operand:VI_AVX2 1 "vector_operand")))]
10148  "TARGET_SSE2"
10149  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
10150
10151(define_expand "<plusminus_insn><mode>3"
10152  [(set (match_operand:VI_AVX2 0 "register_operand")
10153	(plusminus:VI_AVX2
10154	  (match_operand:VI_AVX2 1 "vector_operand")
10155	  (match_operand:VI_AVX2 2 "vector_operand")))]
10156  "TARGET_SSE2"
10157  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10158
10159(define_expand "<plusminus_insn><mode>3_mask"
10160  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10161	(vec_merge:VI48_AVX512VL
10162	  (plusminus:VI48_AVX512VL
10163	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10164	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10165	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10166	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10167  "TARGET_AVX512F"
10168  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10169
10170(define_expand "<plusminus_insn><mode>3_mask"
10171  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10172	(vec_merge:VI12_AVX512VL
10173	  (plusminus:VI12_AVX512VL
10174	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10175	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10176	  (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10177	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10178  "TARGET_AVX512BW"
10179  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10180
10181(define_insn "*<plusminus_insn><mode>3"
10182  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10183	(plusminus:VI_AVX2
10184	  (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10185	  (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10186  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10187  "@
10188   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10189   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10190  [(set_attr "isa" "noavx,avx")
10191   (set_attr "type" "sseiadd")
10192   (set_attr "prefix_data16" "1,*")
10193   (set_attr "prefix" "orig,vex")
10194   (set_attr "mode" "<sseinsnmode>")])
10195
10196(define_insn "*<plusminus_insn><mode>3_mask"
10197  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10198	(vec_merge:VI48_AVX512VL
10199	  (plusminus:VI48_AVX512VL
10200	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10201	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10202	  (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10203	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10204  "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10205  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10206  [(set_attr "type" "sseiadd")
10207   (set_attr "prefix" "evex")
10208   (set_attr "mode" "<sseinsnmode>")])
10209
10210(define_insn "*<plusminus_insn><mode>3_mask"
10211  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10212	(vec_merge:VI12_AVX512VL
10213	  (plusminus:VI12_AVX512VL
10214	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10215	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10216	  (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10217	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10218  "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10219  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10220  [(set_attr "type" "sseiadd")
10221   (set_attr "prefix" "evex")
10222   (set_attr "mode" "<sseinsnmode>")])
10223
10224(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10225  [(set (match_operand:VI12_AVX2 0 "register_operand")
10226	(sat_plusminus:VI12_AVX2
10227	  (match_operand:VI12_AVX2 1 "vector_operand")
10228	  (match_operand:VI12_AVX2 2 "vector_operand")))]
10229  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10230  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10231
10232(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10233  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10234	(sat_plusminus:VI12_AVX2
10235	  (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10236	  (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10237  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10238   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10239  "@
10240   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10241   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10242  [(set_attr "isa" "noavx,avx")
10243   (set_attr "type" "sseiadd")
10244   (set_attr "prefix_data16" "1,*")
10245   (set_attr "prefix" "orig,maybe_evex")
10246   (set_attr "mode" "TI")])
10247
10248(define_expand "mul<mode>3<mask_name>"
10249  [(set (match_operand:VI1_AVX512 0 "register_operand")
10250	(mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10251		       (match_operand:VI1_AVX512 2 "register_operand")))]
10252  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10253{
10254  ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10255  DONE;
10256})
10257
10258(define_expand "mul<mode>3<mask_name>"
10259  [(set (match_operand:VI2_AVX2 0 "register_operand")
10260	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10261		       (match_operand:VI2_AVX2 2 "vector_operand")))]
10262  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10263  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10264
10265(define_insn "*mul<mode>3<mask_name>"
10266  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10267	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
10268		       (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
10269  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10270   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10271  "@
10272   pmullw\t{%2, %0|%0, %2}
10273   vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10274  [(set_attr "isa" "noavx,avx")
10275   (set_attr "type" "sseimul")
10276   (set_attr "prefix_data16" "1,*")
10277   (set_attr "prefix" "orig,vex")
10278   (set_attr "mode" "<sseinsnmode>")])
10279
10280(define_expand "<s>mul<mode>3_highpart<mask_name>"
10281  [(set (match_operand:VI2_AVX2 0 "register_operand")
10282	(truncate:VI2_AVX2
10283	  (lshiftrt:<ssedoublemode>
10284	    (mult:<ssedoublemode>
10285	      (any_extend:<ssedoublemode>
10286		(match_operand:VI2_AVX2 1 "vector_operand"))
10287	      (any_extend:<ssedoublemode>
10288		(match_operand:VI2_AVX2 2 "vector_operand")))
10289	    (const_int 16))))]
10290  "TARGET_SSE2
10291   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10292  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10293
10294(define_insn "*<s>mul<mode>3_highpart<mask_name>"
10295  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10296	(truncate:VI2_AVX2
10297	  (lshiftrt:<ssedoublemode>
10298	    (mult:<ssedoublemode>
10299	      (any_extend:<ssedoublemode>
10300		(match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
10301	      (any_extend:<ssedoublemode>
10302		(match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
10303	    (const_int 16))))]
10304  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10305   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10306  "@
10307   pmulh<u>w\t{%2, %0|%0, %2}
10308   vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10309  [(set_attr "isa" "noavx,avx")
10310   (set_attr "type" "sseimul")
10311   (set_attr "prefix_data16" "1,*")
10312   (set_attr "prefix" "orig,vex")
10313   (set_attr "mode" "<sseinsnmode>")])
10314
10315(define_expand "vec_widen_umult_even_v16si<mask_name>"
10316  [(set (match_operand:V8DI 0 "register_operand")
10317        (mult:V8DI
10318          (zero_extend:V8DI
10319            (vec_select:V8SI
10320              (match_operand:V16SI 1 "nonimmediate_operand")
10321              (parallel [(const_int 0) (const_int 2)
10322                         (const_int 4) (const_int 6)
10323                         (const_int 8) (const_int 10)
10324                         (const_int 12) (const_int 14)])))
10325          (zero_extend:V8DI
10326            (vec_select:V8SI
10327              (match_operand:V16SI 2 "nonimmediate_operand")
10328              (parallel [(const_int 0) (const_int 2)
10329                         (const_int 4) (const_int 6)
10330                         (const_int 8) (const_int 10)
10331                         (const_int 12) (const_int 14)])))))]
10332  "TARGET_AVX512F"
10333  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10334
10335(define_insn "*vec_widen_umult_even_v16si<mask_name>"
10336  [(set (match_operand:V8DI 0 "register_operand" "=v")
10337        (mult:V8DI
10338          (zero_extend:V8DI
10339            (vec_select:V8SI
10340              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10341              (parallel [(const_int 0) (const_int 2)
10342                         (const_int 4) (const_int 6)
10343                         (const_int 8) (const_int 10)
10344                         (const_int 12) (const_int 14)])))
10345          (zero_extend:V8DI
10346            (vec_select:V8SI
10347              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10348              (parallel [(const_int 0) (const_int 2)
10349                         (const_int 4) (const_int 6)
10350                         (const_int 8) (const_int 10)
10351                         (const_int 12) (const_int 14)])))))]
10352  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10353  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10354  [(set_attr "type" "sseimul")
10355   (set_attr "prefix_extra" "1")
10356   (set_attr "prefix" "evex")
10357   (set_attr "mode" "XI")])
10358
10359(define_expand "vec_widen_umult_even_v8si<mask_name>"
10360  [(set (match_operand:V4DI 0 "register_operand")
10361	(mult:V4DI
10362	  (zero_extend:V4DI
10363	    (vec_select:V4SI
10364	      (match_operand:V8SI 1 "nonimmediate_operand")
10365	      (parallel [(const_int 0) (const_int 2)
10366			 (const_int 4) (const_int 6)])))
10367	  (zero_extend:V4DI
10368	    (vec_select:V4SI
10369	      (match_operand:V8SI 2 "nonimmediate_operand")
10370	      (parallel [(const_int 0) (const_int 2)
10371			 (const_int 4) (const_int 6)])))))]
10372  "TARGET_AVX2 && <mask_avx512vl_condition>"
10373  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10374
10375(define_insn "*vec_widen_umult_even_v8si<mask_name>"
10376  [(set (match_operand:V4DI 0 "register_operand" "=v")
10377	(mult:V4DI
10378	  (zero_extend:V4DI
10379	    (vec_select:V4SI
10380	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10381	      (parallel [(const_int 0) (const_int 2)
10382			 (const_int 4) (const_int 6)])))
10383	  (zero_extend:V4DI
10384	    (vec_select:V4SI
10385	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10386	      (parallel [(const_int 0) (const_int 2)
10387			 (const_int 4) (const_int 6)])))))]
10388  "TARGET_AVX2 && <mask_avx512vl_condition>
10389   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10390  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10391  [(set_attr "type" "sseimul")
10392   (set_attr "prefix" "maybe_evex")
10393   (set_attr "mode" "OI")])
10394
10395(define_expand "vec_widen_umult_even_v4si<mask_name>"
10396  [(set (match_operand:V2DI 0 "register_operand")
10397	(mult:V2DI
10398	  (zero_extend:V2DI
10399	    (vec_select:V2SI
10400	      (match_operand:V4SI 1 "vector_operand")
10401	      (parallel [(const_int 0) (const_int 2)])))
10402	  (zero_extend:V2DI
10403	    (vec_select:V2SI
10404	      (match_operand:V4SI 2 "vector_operand")
10405	      (parallel [(const_int 0) (const_int 2)])))))]
10406  "TARGET_SSE2 && <mask_avx512vl_condition>"
10407  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10408
10409(define_insn "*vec_widen_umult_even_v4si<mask_name>"
10410  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10411	(mult:V2DI
10412	  (zero_extend:V2DI
10413	    (vec_select:V2SI
10414	      (match_operand:V4SI 1 "vector_operand" "%0,v")
10415	      (parallel [(const_int 0) (const_int 2)])))
10416	  (zero_extend:V2DI
10417	    (vec_select:V2SI
10418	      (match_operand:V4SI 2 "vector_operand" "xBm,vm")
10419	      (parallel [(const_int 0) (const_int 2)])))))]
10420  "TARGET_SSE2 && <mask_avx512vl_condition>
10421   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10422  "@
10423   pmuludq\t{%2, %0|%0, %2}
10424   vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10425  [(set_attr "isa" "noavx,avx")
10426   (set_attr "type" "sseimul")
10427   (set_attr "prefix_data16" "1,*")
10428   (set_attr "prefix" "orig,maybe_evex")
10429   (set_attr "mode" "TI")])
10430
10431(define_expand "vec_widen_smult_even_v16si<mask_name>"
10432  [(set (match_operand:V8DI 0 "register_operand")
10433        (mult:V8DI
10434          (sign_extend:V8DI
10435            (vec_select:V8SI
10436              (match_operand:V16SI 1 "nonimmediate_operand")
10437              (parallel [(const_int 0) (const_int 2)
10438                         (const_int 4) (const_int 6)
10439                         (const_int 8) (const_int 10)
10440                         (const_int 12) (const_int 14)])))
10441          (sign_extend:V8DI
10442            (vec_select:V8SI
10443              (match_operand:V16SI 2 "nonimmediate_operand")
10444              (parallel [(const_int 0) (const_int 2)
10445                         (const_int 4) (const_int 6)
10446                         (const_int 8) (const_int 10)
10447                         (const_int 12) (const_int 14)])))))]
10448  "TARGET_AVX512F"
10449  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10450
10451(define_insn "*vec_widen_smult_even_v16si<mask_name>"
10452  [(set (match_operand:V8DI 0 "register_operand" "=v")
10453        (mult:V8DI
10454          (sign_extend:V8DI
10455            (vec_select:V8SI
10456              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10457              (parallel [(const_int 0) (const_int 2)
10458                         (const_int 4) (const_int 6)
10459                         (const_int 8) (const_int 10)
10460                         (const_int 12) (const_int 14)])))
10461          (sign_extend:V8DI
10462            (vec_select:V8SI
10463              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10464              (parallel [(const_int 0) (const_int 2)
10465                         (const_int 4) (const_int 6)
10466                         (const_int 8) (const_int 10)
10467                         (const_int 12) (const_int 14)])))))]
10468  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10469  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10470  [(set_attr "type" "sseimul")
10471   (set_attr "prefix_extra" "1")
10472   (set_attr "prefix" "evex")
10473   (set_attr "mode" "XI")])
10474
10475(define_expand "vec_widen_smult_even_v8si<mask_name>"
10476  [(set (match_operand:V4DI 0 "register_operand")
10477	(mult:V4DI
10478	  (sign_extend:V4DI
10479	    (vec_select:V4SI
10480	      (match_operand:V8SI 1 "nonimmediate_operand")
10481	      (parallel [(const_int 0) (const_int 2)
10482			 (const_int 4) (const_int 6)])))
10483	  (sign_extend:V4DI
10484	    (vec_select:V4SI
10485	      (match_operand:V8SI 2 "nonimmediate_operand")
10486	      (parallel [(const_int 0) (const_int 2)
10487			 (const_int 4) (const_int 6)])))))]
10488  "TARGET_AVX2 && <mask_avx512vl_condition>"
10489  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10490
10491(define_insn "*vec_widen_smult_even_v8si<mask_name>"
10492  [(set (match_operand:V4DI 0 "register_operand" "=v")
10493	(mult:V4DI
10494	  (sign_extend:V4DI
10495	    (vec_select:V4SI
10496	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10497	      (parallel [(const_int 0) (const_int 2)
10498			 (const_int 4) (const_int 6)])))
10499	  (sign_extend:V4DI
10500	    (vec_select:V4SI
10501	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10502	      (parallel [(const_int 0) (const_int 2)
10503			 (const_int 4) (const_int 6)])))))]
10504  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10505  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10506  [(set_attr "type" "sseimul")
10507   (set_attr "prefix_extra" "1")
10508   (set_attr "prefix" "vex")
10509   (set_attr "mode" "OI")])
10510
10511(define_expand "sse4_1_mulv2siv2di3<mask_name>"
10512  [(set (match_operand:V2DI 0 "register_operand")
10513	(mult:V2DI
10514	  (sign_extend:V2DI
10515	    (vec_select:V2SI
10516	      (match_operand:V4SI 1 "vector_operand")
10517	      (parallel [(const_int 0) (const_int 2)])))
10518	  (sign_extend:V2DI
10519	    (vec_select:V2SI
10520	      (match_operand:V4SI 2 "vector_operand")
10521	      (parallel [(const_int 0) (const_int 2)])))))]
10522  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10523  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10524
10525(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10526  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10527	(mult:V2DI
10528	  (sign_extend:V2DI
10529	    (vec_select:V2SI
10530	      (match_operand:V4SI 1 "vector_operand" "%0,0,v")
10531	      (parallel [(const_int 0) (const_int 2)])))
10532	  (sign_extend:V2DI
10533	    (vec_select:V2SI
10534	      (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
10535	      (parallel [(const_int 0) (const_int 2)])))))]
10536  "TARGET_SSE4_1 && <mask_avx512vl_condition>
10537   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10538  "@
10539   pmuldq\t{%2, %0|%0, %2}
10540   pmuldq\t{%2, %0|%0, %2}
10541   vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10542  [(set_attr "isa" "noavx,noavx,avx")
10543   (set_attr "type" "sseimul")
10544   (set_attr "prefix_data16" "1,1,*")
10545   (set_attr "prefix_extra" "1")
10546   (set_attr "prefix" "orig,orig,vex")
10547   (set_attr "mode" "TI")])
10548
10549(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10550  [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10551          (unspec:<sseunpackmode>
10552            [(match_operand:VI2_AVX2 1 "register_operand" "v")
10553             (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10554             UNSPEC_PMADDWD512))]
10555   "TARGET_AVX512BW && <mask_mode512bit_condition>"
10556   "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10557  [(set_attr "type" "sseiadd")
10558   (set_attr "prefix" "evex")
10559   (set_attr "mode" "XI")])
10560
10561(define_expand "avx2_pmaddwd"
10562  [(set (match_operand:V8SI 0 "register_operand")
10563	(plus:V8SI
10564	  (mult:V8SI
10565	    (sign_extend:V8SI
10566	      (vec_select:V8HI
10567		(match_operand:V16HI 1 "nonimmediate_operand")
10568		(parallel [(const_int 0) (const_int 2)
10569			   (const_int 4) (const_int 6)
10570			   (const_int 8) (const_int 10)
10571			   (const_int 12) (const_int 14)])))
10572	    (sign_extend:V8SI
10573	      (vec_select:V8HI
10574		(match_operand:V16HI 2 "nonimmediate_operand")
10575		(parallel [(const_int 0) (const_int 2)
10576			   (const_int 4) (const_int 6)
10577			   (const_int 8) (const_int 10)
10578			   (const_int 12) (const_int 14)]))))
10579	  (mult:V8SI
10580	    (sign_extend:V8SI
10581	      (vec_select:V8HI (match_dup 1)
10582		(parallel [(const_int 1) (const_int 3)
10583			   (const_int 5) (const_int 7)
10584			   (const_int 9) (const_int 11)
10585			   (const_int 13) (const_int 15)])))
10586	    (sign_extend:V8SI
10587	      (vec_select:V8HI (match_dup 2)
10588		(parallel [(const_int 1) (const_int 3)
10589			   (const_int 5) (const_int 7)
10590			   (const_int 9) (const_int 11)
10591			   (const_int 13) (const_int 15)]))))))]
10592  "TARGET_AVX2"
10593  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10594
10595(define_insn "*avx2_pmaddwd"
10596  [(set (match_operand:V8SI 0 "register_operand" "=x,v")
10597	(plus:V8SI
10598	  (mult:V8SI
10599	    (sign_extend:V8SI
10600	      (vec_select:V8HI
10601		(match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
10602		(parallel [(const_int 0) (const_int 2)
10603			   (const_int 4) (const_int 6)
10604			   (const_int 8) (const_int 10)
10605			   (const_int 12) (const_int 14)])))
10606	    (sign_extend:V8SI
10607	      (vec_select:V8HI
10608		(match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
10609		(parallel [(const_int 0) (const_int 2)
10610			   (const_int 4) (const_int 6)
10611			   (const_int 8) (const_int 10)
10612			   (const_int 12) (const_int 14)]))))
10613	  (mult:V8SI
10614	    (sign_extend:V8SI
10615	      (vec_select:V8HI (match_dup 1)
10616		(parallel [(const_int 1) (const_int 3)
10617			   (const_int 5) (const_int 7)
10618			   (const_int 9) (const_int 11)
10619			   (const_int 13) (const_int 15)])))
10620	    (sign_extend:V8SI
10621	      (vec_select:V8HI (match_dup 2)
10622		(parallel [(const_int 1) (const_int 3)
10623			   (const_int 5) (const_int 7)
10624			   (const_int 9) (const_int 11)
10625			   (const_int 13) (const_int 15)]))))))]
10626  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10627  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10628  [(set_attr "type" "sseiadd")
10629   (set_attr "isa" "*,avx512bw")
10630   (set_attr "prefix" "vex,evex")
10631   (set_attr "mode" "OI")])
10632
10633(define_expand "sse2_pmaddwd"
10634  [(set (match_operand:V4SI 0 "register_operand")
10635	(plus:V4SI
10636	  (mult:V4SI
10637	    (sign_extend:V4SI
10638	      (vec_select:V4HI
10639		(match_operand:V8HI 1 "vector_operand")
10640		(parallel [(const_int 0) (const_int 2)
10641			   (const_int 4) (const_int 6)])))
10642	    (sign_extend:V4SI
10643	      (vec_select:V4HI
10644		(match_operand:V8HI 2 "vector_operand")
10645		(parallel [(const_int 0) (const_int 2)
10646			   (const_int 4) (const_int 6)]))))
10647	  (mult:V4SI
10648	    (sign_extend:V4SI
10649	      (vec_select:V4HI (match_dup 1)
10650		(parallel [(const_int 1) (const_int 3)
10651			   (const_int 5) (const_int 7)])))
10652	    (sign_extend:V4SI
10653	      (vec_select:V4HI (match_dup 2)
10654		(parallel [(const_int 1) (const_int 3)
10655			   (const_int 5) (const_int 7)]))))))]
10656  "TARGET_SSE2"
10657  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10658
10659(define_insn "*sse2_pmaddwd"
10660  [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
10661	(plus:V4SI
10662	  (mult:V4SI
10663	    (sign_extend:V4SI
10664	      (vec_select:V4HI
10665		(match_operand:V8HI 1 "vector_operand" "%0,x,v")
10666		(parallel [(const_int 0) (const_int 2)
10667			   (const_int 4) (const_int 6)])))
10668	    (sign_extend:V4SI
10669	      (vec_select:V4HI
10670		(match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
10671		(parallel [(const_int 0) (const_int 2)
10672			   (const_int 4) (const_int 6)]))))
10673	  (mult:V4SI
10674	    (sign_extend:V4SI
10675	      (vec_select:V4HI (match_dup 1)
10676		(parallel [(const_int 1) (const_int 3)
10677			   (const_int 5) (const_int 7)])))
10678	    (sign_extend:V4SI
10679	      (vec_select:V4HI (match_dup 2)
10680		(parallel [(const_int 1) (const_int 3)
10681			   (const_int 5) (const_int 7)]))))))]
10682  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10683  "@
10684   pmaddwd\t{%2, %0|%0, %2}
10685   vpmaddwd\t{%2, %1, %0|%0, %1, %2}
10686   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10687  [(set_attr "isa" "noavx,avx,avx512bw")
10688   (set_attr "type" "sseiadd")
10689   (set_attr "atom_unit" "simul")
10690   (set_attr "prefix_data16" "1,*,*")
10691   (set_attr "prefix" "orig,vex,evex")
10692   (set_attr "mode" "TI")])
10693
10694(define_insn "avx512dq_mul<mode>3<mask_name>"
10695  [(set (match_operand:VI8 0 "register_operand" "=v")
10696	(mult:VI8
10697	  (match_operand:VI8 1 "register_operand" "v")
10698	  (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10699  "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10700  "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10701  [(set_attr "type" "sseimul")
10702   (set_attr "prefix" "evex")
10703   (set_attr "mode" "<sseinsnmode>")])
10704
10705(define_expand "mul<mode>3<mask_name>"
10706  [(set (match_operand:VI4_AVX512F 0 "register_operand")
10707	(mult:VI4_AVX512F
10708	  (match_operand:VI4_AVX512F 1 "general_vector_operand")
10709	  (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10710  "TARGET_SSE2 && <mask_mode512bit_condition>"
10711{
10712  if (TARGET_SSE4_1)
10713    {
10714      if (!vector_operand (operands[1], <MODE>mode))
10715	operands[1] = force_reg (<MODE>mode, operands[1]);
10716      if (!vector_operand (operands[2], <MODE>mode))
10717	operands[2] = force_reg (<MODE>mode, operands[2]);
10718      ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10719    }
10720  else
10721    {
10722      ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10723      DONE;
10724    }
10725})
10726
10727(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10728  [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10729	(mult:VI4_AVX512F
10730	  (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
10731	  (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
10732  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10733   && <mask_mode512bit_condition>"
10734  "@
10735   pmulld\t{%2, %0|%0, %2}
10736   pmulld\t{%2, %0|%0, %2}
10737   vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10738  [(set_attr "isa" "noavx,noavx,avx")
10739   (set_attr "type" "sseimul")
10740   (set_attr "prefix_extra" "1")
10741   (set_attr "prefix" "<mask_prefix4>")
10742   (set_attr "btver2_decode" "vector,vector,vector")
10743   (set_attr "mode" "<sseinsnmode>")])
10744
10745(define_expand "mul<mode>3"
10746  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10747	(mult:VI8_AVX2_AVX512F
10748	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10749	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10750  "TARGET_SSE2"
10751{
10752  ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
10753  DONE;
10754})
10755
10756(define_expand "vec_widen_<s>mult_hi_<mode>"
10757  [(match_operand:<sseunpackmode> 0 "register_operand")
10758   (any_extend:<sseunpackmode>
10759     (match_operand:VI124_AVX2 1 "register_operand"))
10760   (match_operand:VI124_AVX2 2 "register_operand")]
10761  "TARGET_SSE2"
10762{
10763  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10764			      <u_bool>, true);
10765  DONE;
10766})
10767
10768(define_expand "vec_widen_<s>mult_lo_<mode>"
10769  [(match_operand:<sseunpackmode> 0 "register_operand")
10770   (any_extend:<sseunpackmode>
10771     (match_operand:VI124_AVX2 1 "register_operand"))
10772   (match_operand:VI124_AVX2 2 "register_operand")]
10773  "TARGET_SSE2"
10774{
10775  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10776			      <u_bool>, false);
10777  DONE;
10778})
10779
10780;; Most widen_<s>mult_even_<mode> can be handled directly from other
10781;; named patterns, but signed V4SI needs special help for plain SSE2.
10782(define_expand "vec_widen_smult_even_v4si"
10783  [(match_operand:V2DI 0 "register_operand")
10784   (match_operand:V4SI 1 "vector_operand")
10785   (match_operand:V4SI 2 "vector_operand")]
10786  "TARGET_SSE2"
10787{
10788  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10789				 false, false);
10790  DONE;
10791})
10792
10793(define_expand "vec_widen_<s>mult_odd_<mode>"
10794  [(match_operand:<sseunpackmode> 0 "register_operand")
10795   (any_extend:<sseunpackmode>
10796     (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10797   (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10798  "TARGET_SSE2"
10799{
10800  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10801				 <u_bool>, true);
10802  DONE;
10803})
10804
10805(define_mode_attr SDOT_PMADD_SUF
10806  [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10807
10808(define_expand "sdot_prod<mode>"
10809  [(match_operand:<sseunpackmode> 0 "register_operand")
10810   (match_operand:VI2_AVX2 1 "register_operand")
10811   (match_operand:VI2_AVX2 2 "register_operand")
10812   (match_operand:<sseunpackmode> 3 "register_operand")]
10813  "TARGET_SSE2"
10814{
10815  rtx t = gen_reg_rtx (<sseunpackmode>mode);
10816  emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10817  emit_insn (gen_rtx_SET (operands[0],
10818			  gen_rtx_PLUS (<sseunpackmode>mode,
10819					operands[3], t)));
10820  DONE;
10821})
10822
10823;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10824;; back together when madd is available.
10825(define_expand "sdot_prodv4si"
10826  [(match_operand:V2DI 0 "register_operand")
10827   (match_operand:V4SI 1 "register_operand")
10828   (match_operand:V4SI 2 "register_operand")
10829   (match_operand:V2DI 3 "register_operand")]
10830  "TARGET_XOP"
10831{
10832  rtx t = gen_reg_rtx (V2DImode);
10833  emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10834  emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10835  DONE;
10836})
10837
10838(define_expand "usadv16qi"
10839  [(match_operand:V4SI 0 "register_operand")
10840   (match_operand:V16QI 1 "register_operand")
10841   (match_operand:V16QI 2 "vector_operand")
10842   (match_operand:V4SI 3 "vector_operand")]
10843  "TARGET_SSE2"
10844{
10845  rtx t1 = gen_reg_rtx (V2DImode);
10846  rtx t2 = gen_reg_rtx (V4SImode);
10847  emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10848  convert_move (t2, t1, 0);
10849  emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10850  DONE;
10851})
10852
10853(define_expand "usadv32qi"
10854  [(match_operand:V8SI 0 "register_operand")
10855   (match_operand:V32QI 1 "register_operand")
10856   (match_operand:V32QI 2 "nonimmediate_operand")
10857   (match_operand:V8SI 3 "nonimmediate_operand")]
10858  "TARGET_AVX2"
10859{
10860  rtx t1 = gen_reg_rtx (V4DImode);
10861  rtx t2 = gen_reg_rtx (V8SImode);
10862  emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10863  convert_move (t2, t1, 0);
10864  emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10865  DONE;
10866})
10867
10868(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10869  [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
10870	(ashiftrt:VI248_AVX512BW_1
10871	  (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10872	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10873  "TARGET_AVX512VL"
10874  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10875  [(set_attr "type" "sseishft")
10876   (set (attr "length_immediate")
10877     (if_then_else (match_operand 2 "const_int_operand")
10878       (const_string "1")
10879       (const_string "0")))
10880   (set_attr "mode" "<sseinsnmode>")])
10881
10882(define_insn "ashr<mode>3"
10883  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10884	(ashiftrt:VI24_AVX2
10885	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10886	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10887  "TARGET_SSE2"
10888  "@
10889   psra<ssemodesuffix>\t{%2, %0|%0, %2}
10890   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10891  [(set_attr "isa" "noavx,avx")
10892   (set_attr "type" "sseishft")
10893   (set (attr "length_immediate")
10894     (if_then_else (match_operand 2 "const_int_operand")
10895       (const_string "1")
10896       (const_string "0")))
10897   (set_attr "prefix_data16" "1,*")
10898   (set_attr "prefix" "orig,vex")
10899   (set_attr "mode" "<sseinsnmode>")])
10900
10901(define_insn "ashr<mode>3<mask_name>"
10902  [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10903	(ashiftrt:VI248_AVX512BW_AVX512VL
10904	  (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10905	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10906  "TARGET_AVX512F"
10907  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10908  [(set_attr "type" "sseishft")
10909   (set (attr "length_immediate")
10910     (if_then_else (match_operand 2 "const_int_operand")
10911       (const_string "1")
10912       (const_string "0")))
10913   (set_attr "mode" "<sseinsnmode>")])
10914
10915(define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
10916  [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
10917	(any_lshift:VI248_AVX512BW_2
10918	  (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
10919	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10920  "TARGET_AVX512VL"
10921  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10922  [(set_attr "type" "sseishft")
10923   (set (attr "length_immediate")
10924     (if_then_else (match_operand 2 "const_int_operand")
10925       (const_string "1")
10926       (const_string "0")))
10927   (set_attr "mode" "<sseinsnmode>")])
10928
10929(define_insn "<shift_insn><mode>3"
10930  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
10931	(any_lshift:VI248_AVX2
10932	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
10933	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10934  "TARGET_SSE2"
10935  "@
10936   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10937   vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10938  [(set_attr "isa" "noavx,avx")
10939   (set_attr "type" "sseishft")
10940   (set (attr "length_immediate")
10941     (if_then_else (match_operand 2 "const_int_operand")
10942       (const_string "1")
10943       (const_string "0")))
10944   (set_attr "prefix_data16" "1,*")
10945   (set_attr "prefix" "orig,vex")
10946   (set_attr "mode" "<sseinsnmode>")])
10947
10948(define_insn "<shift_insn><mode>3<mask_name>"
10949  [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
10950	(any_lshift:VI248_AVX512BW
10951	  (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
10952	  (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
10953  "TARGET_AVX512F"
10954  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10955  [(set_attr "type" "sseishft")
10956   (set (attr "length_immediate")
10957     (if_then_else (match_operand 2 "const_int_operand")
10958       (const_string "1")
10959       (const_string "0")))
10960   (set_attr "mode" "<sseinsnmode>")])
10961
10962
10963(define_expand "vec_shr_<mode>"
10964  [(set (match_dup 3)
10965	(lshiftrt:V1TI
10966	 (match_operand:VI_128 1 "register_operand")
10967	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10968   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10969  "TARGET_SSE2"
10970{
10971  operands[1] = gen_lowpart (V1TImode, operands[1]);
10972  operands[3] = gen_reg_rtx (V1TImode);
10973  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10974})
10975
10976(define_insn "avx512bw_<shift_insn><mode>3"
10977  [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
10978	(any_lshift:VIMAX_AVX512VL
10979	 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
10980	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
10981  "TARGET_AVX512BW"
10982{
10983  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10984  return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10985}
10986  [(set_attr "type" "sseishft")
10987   (set_attr "length_immediate" "1")
10988   (set_attr "prefix" "maybe_evex")
10989   (set_attr "mode" "<sseinsnmode>")])
10990
10991(define_insn "<sse2_avx2>_<shift_insn><mode>3"
10992  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10993	(any_lshift:VIMAX_AVX2
10994	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10995	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10996  "TARGET_SSE2"
10997{
10998  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10999
11000  switch (which_alternative)
11001    {
11002    case 0:
11003      return "p<vshift>dq\t{%2, %0|%0, %2}";
11004    case 1:
11005      return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11006    default:
11007      gcc_unreachable ();
11008    }
11009}
11010  [(set_attr "isa" "noavx,avx")
11011   (set_attr "type" "sseishft")
11012   (set_attr "length_immediate" "1")
11013   (set_attr "atom_unit" "sishuf")
11014   (set_attr "prefix_data16" "1,*")
11015   (set_attr "prefix" "orig,vex")
11016   (set_attr "mode" "<sseinsnmode>")])
11017
11018(define_insn "<avx512>_<rotate>v<mode><mask_name>"
11019  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11020	(any_rotate:VI48_AVX512VL
11021	  (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11022	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11023  "TARGET_AVX512F"
11024  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11025  [(set_attr "prefix" "evex")
11026   (set_attr "mode" "<sseinsnmode>")])
11027
11028(define_insn "<avx512>_<rotate><mode><mask_name>"
11029  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11030	(any_rotate:VI48_AVX512VL
11031	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
11032	  (match_operand:SI 2 "const_0_to_255_operand")))]
11033  "TARGET_AVX512F"
11034  "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11035  [(set_attr "prefix" "evex")
11036   (set_attr "mode" "<sseinsnmode>")])
11037
11038(define_expand "<code><mode>3"
11039  [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
11040	(maxmin:VI124_256_AVX512F_AVX512BW
11041	  (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
11042	  (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
11043  "TARGET_AVX2"
11044  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11045
11046(define_insn "*avx2_<code><mode>3"
11047  [(set (match_operand:VI124_256 0 "register_operand" "=v")
11048	(maxmin:VI124_256
11049	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
11050	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
11051  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11052  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11053  [(set_attr "type" "sseiadd")
11054   (set_attr "prefix_extra" "1")
11055   (set_attr "prefix" "vex")
11056   (set_attr "mode" "OI")])
11057
11058(define_expand "<code><mode>3_mask"
11059  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11060	(vec_merge:VI48_AVX512VL
11061	  (maxmin:VI48_AVX512VL
11062	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11063	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11064	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11065	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11066  "TARGET_AVX512F"
11067  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11068
11069(define_insn "*avx512f_<code><mode>3<mask_name>"
11070  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11071	(maxmin:VI48_AVX512VL
11072	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11073	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11074  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11075  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11076  [(set_attr "type" "sseiadd")
11077   (set_attr "prefix_extra" "1")
11078   (set_attr "prefix" "maybe_evex")
11079   (set_attr "mode" "<sseinsnmode>")])
11080
11081(define_insn "<mask_codefor><code><mode>3<mask_name>"
11082  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11083        (maxmin:VI12_AVX512VL
11084          (match_operand:VI12_AVX512VL 1 "register_operand" "v")
11085          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
11086  "TARGET_AVX512BW"
11087  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11088  [(set_attr "type" "sseiadd")
11089   (set_attr "prefix" "evex")
11090   (set_attr "mode" "<sseinsnmode>")])
11091
11092(define_expand "<code><mode>3"
11093  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11094	(maxmin:VI8_AVX2_AVX512F
11095	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11096	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11097  "TARGET_SSE4_2"
11098{
11099  if (TARGET_AVX512F
11100      && (<MODE>mode == V8DImode || TARGET_AVX512VL))
11101    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11102  else
11103    {
11104      enum rtx_code code;
11105      rtx xops[6];
11106      bool ok;
11107
11108
11109      xops[0] = operands[0];
11110
11111      if (<CODE> == SMAX || <CODE> == UMAX)
11112	{
11113	  xops[1] = operands[1];
11114	  xops[2] = operands[2];
11115	}
11116      else
11117	{
11118	  xops[1] = operands[2];
11119	  xops[2] = operands[1];
11120	}
11121
11122      code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
11123
11124      xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
11125      xops[4] = operands[1];
11126      xops[5] = operands[2];
11127
11128      ok = ix86_expand_int_vcond (xops);
11129      gcc_assert (ok);
11130      DONE;
11131    }
11132})
11133
11134(define_expand "<code><mode>3"
11135  [(set (match_operand:VI124_128 0 "register_operand")
11136	(smaxmin:VI124_128
11137	  (match_operand:VI124_128 1 "vector_operand")
11138	  (match_operand:VI124_128 2 "vector_operand")))]
11139  "TARGET_SSE2"
11140{
11141  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
11142    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11143  else
11144    {
11145      rtx xops[6];
11146      bool ok;
11147
11148      xops[0] = operands[0];
11149      operands[1] = force_reg (<MODE>mode, operands[1]);
11150      operands[2] = force_reg (<MODE>mode, operands[2]);
11151
11152      if (<CODE> == SMAX)
11153	{
11154	  xops[1] = operands[1];
11155	  xops[2] = operands[2];
11156	}
11157      else
11158	{
11159	  xops[1] = operands[2];
11160	  xops[2] = operands[1];
11161	}
11162
11163      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
11164      xops[4] = operands[1];
11165      xops[5] = operands[2];
11166
11167      ok = ix86_expand_int_vcond (xops);
11168      gcc_assert (ok);
11169      DONE;
11170    }
11171})
11172
11173(define_insn "*sse4_1_<code><mode>3<mask_name>"
11174  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11175	(smaxmin:VI14_128
11176	  (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11177	  (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11178  "TARGET_SSE4_1
11179   && <mask_mode512bit_condition>
11180   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11181  "@
11182   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11183   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11184   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11185  [(set_attr "isa" "noavx,noavx,avx")
11186   (set_attr "type" "sseiadd")
11187   (set_attr "prefix_extra" "1,1,*")
11188   (set_attr "prefix" "orig,orig,vex")
11189   (set_attr "mode" "TI")])
11190
11191(define_insn "*<code>v8hi3"
11192  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11193	(smaxmin:V8HI
11194	  (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11195	  (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11196  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11197  "@
11198   p<maxmin_int>w\t{%2, %0|%0, %2}
11199   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11200   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11201  [(set_attr "isa" "noavx,avx,avx512bw")
11202   (set_attr "type" "sseiadd")
11203   (set_attr "prefix_data16" "1,*,*")
11204   (set_attr "prefix_extra" "*,1,1")
11205   (set_attr "prefix" "orig,vex,evex")
11206   (set_attr "mode" "TI")])
11207
11208(define_expand "<code><mode>3"
11209  [(set (match_operand:VI124_128 0 "register_operand")
11210	(umaxmin:VI124_128
11211	  (match_operand:VI124_128 1 "vector_operand")
11212	  (match_operand:VI124_128 2 "vector_operand")))]
11213  "TARGET_SSE2"
11214{
11215  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11216    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11217  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11218    {
11219      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11220      operands[1] = force_reg (<MODE>mode, operands[1]);
11221      if (rtx_equal_p (op3, op2))
11222	op3 = gen_reg_rtx (V8HImode);
11223      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11224      emit_insn (gen_addv8hi3 (op0, op3, op2));
11225      DONE;
11226    }
11227  else
11228    {
11229      rtx xops[6];
11230      bool ok;
11231
11232      operands[1] = force_reg (<MODE>mode, operands[1]);
11233      operands[2] = force_reg (<MODE>mode, operands[2]);
11234
11235      xops[0] = operands[0];
11236
11237      if (<CODE> == UMAX)
11238	{
11239	  xops[1] = operands[1];
11240	  xops[2] = operands[2];
11241	}
11242      else
11243	{
11244	  xops[1] = operands[2];
11245	  xops[2] = operands[1];
11246	}
11247
11248      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
11249      xops[4] = operands[1];
11250      xops[5] = operands[2];
11251
11252      ok = ix86_expand_int_vcond (xops);
11253      gcc_assert (ok);
11254      DONE;
11255    }
11256})
11257
11258(define_insn "*sse4_1_<code><mode>3<mask_name>"
11259  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
11260	(umaxmin:VI24_128
11261	  (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
11262	  (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11263  "TARGET_SSE4_1
11264   && <mask_mode512bit_condition>
11265   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11266  "@
11267   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11268   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11269   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11270  [(set_attr "isa" "noavx,noavx,avx")
11271   (set_attr "type" "sseiadd")
11272   (set_attr "prefix_extra" "1,1,*")
11273   (set_attr "prefix" "orig,orig,vex")
11274   (set_attr "mode" "TI")])
11275
11276(define_insn "*<code>v16qi3"
11277  [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
11278	(umaxmin:V16QI
11279	  (match_operand:V16QI 1 "vector_operand" "%0,x,v")
11280	  (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
11281  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11282  "@
11283   p<maxmin_int>b\t{%2, %0|%0, %2}
11284   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
11285   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
11286  [(set_attr "isa" "noavx,avx,avx512bw")
11287   (set_attr "type" "sseiadd")
11288   (set_attr "prefix_data16" "1,*,*")
11289   (set_attr "prefix_extra" "*,1,1")
11290   (set_attr "prefix" "orig,vex,evex")
11291   (set_attr "mode" "TI")])
11292
11293;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11294;;
11295;; Parallel integral comparisons
11296;;
11297;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11298
11299(define_expand "avx2_eq<mode>3"
11300  [(set (match_operand:VI_256 0 "register_operand")
11301	(eq:VI_256
11302	  (match_operand:VI_256 1 "nonimmediate_operand")
11303	  (match_operand:VI_256 2 "nonimmediate_operand")))]
11304  "TARGET_AVX2"
11305  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11306
11307(define_insn "*avx2_eq<mode>3"
11308  [(set (match_operand:VI_256 0 "register_operand" "=x")
11309	(eq:VI_256
11310	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
11311	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11312  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11313  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11314  [(set_attr "type" "ssecmp")
11315   (set_attr "prefix_extra" "1")
11316   (set_attr "prefix" "vex")
11317   (set_attr "mode" "OI")])
11318
11319(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11320  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11321	(unspec:<avx512fmaskmode>
11322	  [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11323	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
11324	  UNSPEC_MASKED_EQ))]
11325  "TARGET_AVX512BW"
11326  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11327
11328(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11329  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11330	(unspec:<avx512fmaskmode>
11331	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11332	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
11333	  UNSPEC_MASKED_EQ))]
11334  "TARGET_AVX512F"
11335  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11336
11337(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11338  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11339	(unspec:<avx512fmaskmode>
11340	  [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "%v")
11341	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11342	  UNSPEC_MASKED_EQ))]
11343  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11344  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11345  [(set_attr "type" "ssecmp")
11346   (set_attr "prefix_extra" "1")
11347   (set_attr "prefix" "evex")
11348   (set_attr "mode" "<sseinsnmode>")])
11349
11350(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11351  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11352	(unspec:<avx512fmaskmode>
11353	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11354	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11355	  UNSPEC_MASKED_EQ))]
11356  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11357  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11358  [(set_attr "type" "ssecmp")
11359   (set_attr "prefix_extra" "1")
11360   (set_attr "prefix" "evex")
11361   (set_attr "mode" "<sseinsnmode>")])
11362
11363(define_insn "*sse4_1_eqv2di3"
11364  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11365	(eq:V2DI
11366	  (match_operand:V2DI 1 "vector_operand" "%0,0,x")
11367	  (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11368  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11369  "@
11370   pcmpeqq\t{%2, %0|%0, %2}
11371   pcmpeqq\t{%2, %0|%0, %2}
11372   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
11373  [(set_attr "isa" "noavx,noavx,avx")
11374   (set_attr "type" "ssecmp")
11375   (set_attr "prefix_extra" "1")
11376   (set_attr "prefix" "orig,orig,vex")
11377   (set_attr "mode" "TI")])
11378
11379(define_insn "*sse2_eq<mode>3"
11380  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11381	(eq:VI124_128
11382	  (match_operand:VI124_128 1 "vector_operand" "%0,x")
11383	  (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11384  "TARGET_SSE2 && !TARGET_XOP
11385   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11386  "@
11387   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
11388   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11389  [(set_attr "isa" "noavx,avx")
11390   (set_attr "type" "ssecmp")
11391   (set_attr "prefix_data16" "1,*")
11392   (set_attr "prefix" "orig,vex")
11393   (set_attr "mode" "TI")])
11394
11395(define_expand "sse2_eq<mode>3"
11396  [(set (match_operand:VI124_128 0 "register_operand")
11397	(eq:VI124_128
11398	  (match_operand:VI124_128 1 "vector_operand")
11399	  (match_operand:VI124_128 2 "vector_operand")))]
11400  "TARGET_SSE2 && !TARGET_XOP "
11401  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11402
11403(define_expand "sse4_1_eqv2di3"
11404  [(set (match_operand:V2DI 0 "register_operand")
11405	(eq:V2DI
11406	  (match_operand:V2DI 1 "vector_operand")
11407	  (match_operand:V2DI 2 "vector_operand")))]
11408  "TARGET_SSE4_1"
11409  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
11410
11411(define_insn "sse4_2_gtv2di3"
11412  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11413	(gt:V2DI
11414	  (match_operand:V2DI 1 "register_operand" "0,0,x")
11415	  (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11416  "TARGET_SSE4_2"
11417  "@
11418   pcmpgtq\t{%2, %0|%0, %2}
11419   pcmpgtq\t{%2, %0|%0, %2}
11420   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
11421  [(set_attr "isa" "noavx,noavx,avx")
11422   (set_attr "type" "ssecmp")
11423   (set_attr "prefix_extra" "1")
11424   (set_attr "prefix" "orig,orig,vex")
11425   (set_attr "mode" "TI")])
11426
11427(define_insn "avx2_gt<mode>3"
11428  [(set (match_operand:VI_256 0 "register_operand" "=x")
11429	(gt:VI_256
11430	  (match_operand:VI_256 1 "register_operand" "x")
11431	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11432  "TARGET_AVX2"
11433  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11434  [(set_attr "type" "ssecmp")
11435   (set_attr "prefix_extra" "1")
11436   (set_attr "prefix" "vex")
11437   (set_attr "mode" "OI")])
11438
11439(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11440  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11441	(unspec:<avx512fmaskmode>
11442	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11443	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11444  "TARGET_AVX512F"
11445  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11446  [(set_attr "type" "ssecmp")
11447   (set_attr "prefix_extra" "1")
11448   (set_attr "prefix" "evex")
11449   (set_attr "mode" "<sseinsnmode>")])
11450
11451(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11452  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11453	(unspec:<avx512fmaskmode>
11454	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11455	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11456  "TARGET_AVX512BW"
11457  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11458  [(set_attr "type" "ssecmp")
11459   (set_attr "prefix_extra" "1")
11460   (set_attr "prefix" "evex")
11461   (set_attr "mode" "<sseinsnmode>")])
11462
11463(define_insn "sse2_gt<mode>3"
11464  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11465	(gt:VI124_128
11466	  (match_operand:VI124_128 1 "register_operand" "0,x")
11467	  (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11468  "TARGET_SSE2 && !TARGET_XOP"
11469  "@
11470   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
11471   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11472  [(set_attr "isa" "noavx,avx")
11473   (set_attr "type" "ssecmp")
11474   (set_attr "prefix_data16" "1,*")
11475   (set_attr "prefix" "orig,vex")
11476   (set_attr "mode" "TI")])
11477
11478(define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
11479  [(set (match_operand:V_512 0 "register_operand")
11480	(if_then_else:V_512
11481	  (match_operator 3 ""
11482	    [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11483	     (match_operand:VI_AVX512BW 5 "general_operand")])
11484	  (match_operand:V_512 1)
11485	  (match_operand:V_512 2)))]
11486  "TARGET_AVX512F
11487   && (GET_MODE_NUNITS (<V_512:MODE>mode)
11488       == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11489{
11490  bool ok = ix86_expand_int_vcond (operands);
11491  gcc_assert (ok);
11492  DONE;
11493})
11494
11495(define_expand "vcond<V_256:mode><VI_256:mode>"
11496  [(set (match_operand:V_256 0 "register_operand")
11497	(if_then_else:V_256
11498	  (match_operator 3 ""
11499	    [(match_operand:VI_256 4 "nonimmediate_operand")
11500	     (match_operand:VI_256 5 "general_operand")])
11501	  (match_operand:V_256 1)
11502	  (match_operand:V_256 2)))]
11503  "TARGET_AVX2
11504   && (GET_MODE_NUNITS (<V_256:MODE>mode)
11505       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11506{
11507  bool ok = ix86_expand_int_vcond (operands);
11508  gcc_assert (ok);
11509  DONE;
11510})
11511
11512(define_expand "vcond<V_128:mode><VI124_128:mode>"
11513  [(set (match_operand:V_128 0 "register_operand")
11514	(if_then_else:V_128
11515	  (match_operator 3 ""
11516	    [(match_operand:VI124_128 4 "vector_operand")
11517	     (match_operand:VI124_128 5 "general_operand")])
11518	  (match_operand:V_128 1)
11519	  (match_operand:V_128 2)))]
11520  "TARGET_SSE2
11521   && (GET_MODE_NUNITS (<V_128:MODE>mode)
11522       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11523{
11524  bool ok = ix86_expand_int_vcond (operands);
11525  gcc_assert (ok);
11526  DONE;
11527})
11528
11529(define_expand "vcond<VI8F_128:mode>v2di"
11530  [(set (match_operand:VI8F_128 0 "register_operand")
11531	(if_then_else:VI8F_128
11532	  (match_operator 3 ""
11533	    [(match_operand:V2DI 4 "vector_operand")
11534	     (match_operand:V2DI 5 "general_operand")])
11535	  (match_operand:VI8F_128 1)
11536	  (match_operand:VI8F_128 2)))]
11537  "TARGET_SSE4_2"
11538{
11539  bool ok = ix86_expand_int_vcond (operands);
11540  gcc_assert (ok);
11541  DONE;
11542})
11543
11544(define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
11545  [(set (match_operand:V_512 0 "register_operand")
11546	(if_then_else:V_512
11547	  (match_operator 3 ""
11548	    [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11549	     (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
11550	  (match_operand:V_512 1 "general_operand")
11551	  (match_operand:V_512 2 "general_operand")))]
11552  "TARGET_AVX512F
11553   && (GET_MODE_NUNITS (<V_512:MODE>mode)
11554       == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11555{
11556  bool ok = ix86_expand_int_vcond (operands);
11557  gcc_assert (ok);
11558  DONE;
11559})
11560
11561(define_expand "vcondu<V_256:mode><VI_256:mode>"
11562  [(set (match_operand:V_256 0 "register_operand")
11563	(if_then_else:V_256
11564	  (match_operator 3 ""
11565	    [(match_operand:VI_256 4 "nonimmediate_operand")
11566	     (match_operand:VI_256 5 "nonimmediate_operand")])
11567	  (match_operand:V_256 1 "general_operand")
11568	  (match_operand:V_256 2 "general_operand")))]
11569  "TARGET_AVX2
11570   && (GET_MODE_NUNITS (<V_256:MODE>mode)
11571       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11572{
11573  bool ok = ix86_expand_int_vcond (operands);
11574  gcc_assert (ok);
11575  DONE;
11576})
11577
11578(define_expand "vcondu<V_128:mode><VI124_128:mode>"
11579  [(set (match_operand:V_128 0 "register_operand")
11580	(if_then_else:V_128
11581	  (match_operator 3 ""
11582	    [(match_operand:VI124_128 4 "vector_operand")
11583	     (match_operand:VI124_128 5 "vector_operand")])
11584	  (match_operand:V_128 1 "general_operand")
11585	  (match_operand:V_128 2 "general_operand")))]
11586  "TARGET_SSE2
11587   && (GET_MODE_NUNITS (<V_128:MODE>mode)
11588       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11589{
11590  bool ok = ix86_expand_int_vcond (operands);
11591  gcc_assert (ok);
11592  DONE;
11593})
11594
11595(define_expand "vcondu<VI8F_128:mode>v2di"
11596  [(set (match_operand:VI8F_128 0 "register_operand")
11597	(if_then_else:VI8F_128
11598	  (match_operator 3 ""
11599	    [(match_operand:V2DI 4 "vector_operand")
11600	     (match_operand:V2DI 5 "vector_operand")])
11601	  (match_operand:VI8F_128 1 "general_operand")
11602	  (match_operand:VI8F_128 2 "general_operand")))]
11603  "TARGET_SSE4_2"
11604{
11605  bool ok = ix86_expand_int_vcond (operands);
11606  gcc_assert (ok);
11607  DONE;
11608})
11609
11610(define_expand "vcondeq<VI8F_128:mode>v2di"
11611  [(set (match_operand:VI8F_128 0 "register_operand")
11612	(if_then_else:VI8F_128
11613	  (match_operator 3 ""
11614	    [(match_operand:V2DI 4 "vector_operand")
11615	     (match_operand:V2DI 5 "general_operand")])
11616	  (match_operand:VI8F_128 1)
11617	  (match_operand:VI8F_128 2)))]
11618  "TARGET_SSE4_1"
11619{
11620  bool ok = ix86_expand_int_vcond (operands);
11621  gcc_assert (ok);
11622  DONE;
11623})
11624
11625(define_mode_iterator VEC_PERM_AVX2
11626  [V16QI V8HI V4SI V2DI V4SF V2DF
11627   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11628   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11629   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11630   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11631   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11632   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11633
11634(define_expand "vec_perm<mode>"
11635  [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11636   (match_operand:VEC_PERM_AVX2 1 "register_operand")
11637   (match_operand:VEC_PERM_AVX2 2 "register_operand")
11638   (match_operand:<sseintvecmode> 3 "register_operand")]
11639  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11640{
11641  ix86_expand_vec_perm (operands);
11642  DONE;
11643})
11644
11645;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11646;;
11647;; Parallel bitwise logical operations
11648;;
11649;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11650
11651(define_expand "one_cmpl<mode>2"
11652  [(set (match_operand:VI 0 "register_operand")
11653	(xor:VI (match_operand:VI 1 "vector_operand")
11654		(match_dup 2)))]
11655  "TARGET_SSE"
11656{
11657  operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
11658})
11659
11660(define_expand "<sse2_avx2>_andnot<mode>3"
11661  [(set (match_operand:VI_AVX2 0 "register_operand")
11662	(and:VI_AVX2
11663	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11664	  (match_operand:VI_AVX2 2 "vector_operand")))]
11665  "TARGET_SSE2")
11666
11667(define_expand "<sse2_avx2>_andnot<mode>3_mask"
11668  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11669	(vec_merge:VI48_AVX512VL
11670	  (and:VI48_AVX512VL
11671	    (not:VI48_AVX512VL
11672	      (match_operand:VI48_AVX512VL 1 "register_operand"))
11673	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11674	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11675	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11676  "TARGET_AVX512F")
11677
11678(define_expand "<sse2_avx2>_andnot<mode>3_mask"
11679  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11680	(vec_merge:VI12_AVX512VL
11681	  (and:VI12_AVX512VL
11682	    (not:VI12_AVX512VL
11683	      (match_operand:VI12_AVX512VL 1 "register_operand"))
11684	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11685	  (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11686	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11687  "TARGET_AVX512BW")
11688
11689(define_insn "*andnot<mode>3"
11690  [(set (match_operand:VI 0 "register_operand" "=x,x,v")
11691	(and:VI
11692	  (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
11693	  (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
11694  "TARGET_SSE"
11695{
11696  static char buf[64];
11697  const char *ops;
11698  const char *tmp;
11699  const char *ssesuffix;
11700
11701  switch (get_attr_mode (insn))
11702    {
11703    case MODE_XI:
11704      gcc_assert (TARGET_AVX512F);
11705      /* FALLTHRU */
11706    case MODE_OI:
11707      gcc_assert (TARGET_AVX2);
11708      /* FALLTHRU */
11709    case MODE_TI:
11710      gcc_assert (TARGET_SSE2);
11711      tmp = "pandn";
11712      switch (<MODE>mode)
11713	{
11714	case E_V64QImode:
11715	case E_V32HImode:
11716	  /* There is no vpandnb or vpandnw instruction, nor vpandn for
11717	     512-bit vectors. Use vpandnq instead.  */
11718	  ssesuffix = "q";
11719	  break;
11720	case E_V16SImode:
11721	case E_V8DImode:
11722	  ssesuffix = "<ssemodesuffix>";
11723	  break;
11724	case E_V8SImode:
11725	case E_V4DImode:
11726	case E_V4SImode:
11727	case E_V2DImode:
11728	  ssesuffix = (TARGET_AVX512VL && which_alternative == 2
11729		       ? "<ssemodesuffix>" : "");
11730	  break;
11731	default:
11732	  ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11733	}
11734      break;
11735
11736    case MODE_V16SF:
11737      gcc_assert (TARGET_AVX512F);
11738      /* FALLTHRU */
11739    case MODE_V8SF:
11740      gcc_assert (TARGET_AVX);
11741      /* FALLTHRU */
11742    case MODE_V4SF:
11743      gcc_assert (TARGET_SSE);
11744      tmp = "andn";
11745      ssesuffix = "ps";
11746      break;
11747
11748    default:
11749      gcc_unreachable ();
11750    }
11751
11752  switch (which_alternative)
11753    {
11754    case 0:
11755      ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11756      break;
11757    case 1:
11758    case 2:
11759      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11760      break;
11761    default:
11762      gcc_unreachable ();
11763    }
11764
11765  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11766  return buf;
11767}
11768  [(set_attr "isa" "noavx,avx,avx")
11769   (set_attr "type" "sselog")
11770   (set (attr "prefix_data16")
11771     (if_then_else
11772       (and (eq_attr "alternative" "0")
11773	    (eq_attr "mode" "TI"))
11774       (const_string "1")
11775       (const_string "*")))
11776   (set_attr "prefix" "orig,vex,evex")
11777   (set (attr "mode")
11778	(cond [(and (match_test "<MODE_SIZE> == 16")
11779		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11780		 (const_string "<ssePSmode>")
11781	       (match_test "TARGET_AVX2")
11782		 (const_string "<sseinsnmode>")
11783	       (match_test "TARGET_AVX")
11784		 (if_then_else
11785		   (match_test "<MODE_SIZE> > 16")
11786		   (const_string "V8SF")
11787		   (const_string "<sseinsnmode>"))
11788	       (ior (not (match_test "TARGET_SSE2"))
11789		    (match_test "optimize_function_for_size_p (cfun)"))
11790		 (const_string "V4SF")
11791	      ]
11792	      (const_string "<sseinsnmode>")))])
11793
11794(define_insn "*andnot<mode>3_mask"
11795  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11796	(vec_merge:VI48_AVX512VL
11797	  (and:VI48_AVX512VL
11798	    (not:VI48_AVX512VL
11799	      (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11800	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11801	  (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11802	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11803  "TARGET_AVX512F"
11804  "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11805  [(set_attr "type" "sselog")
11806   (set_attr "prefix" "evex")
11807   (set_attr "mode" "<sseinsnmode>")])
11808
11809(define_expand "<code><mode>3"
11810  [(set (match_operand:VI 0 "register_operand")
11811	(any_logic:VI
11812	  (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11813	  (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11814  "TARGET_SSE"
11815{
11816  ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11817  DONE;
11818})
11819
11820(define_insn "<mask_codefor><code><mode>3<mask_name>"
11821  [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
11822	(any_logic:VI48_AVX_AVX512F
11823	  (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11824	  (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11825  "TARGET_SSE && <mask_mode512bit_condition>
11826   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11827{
11828  static char buf[64];
11829  const char *ops;
11830  const char *tmp;
11831  const char *ssesuffix;
11832
11833  switch (get_attr_mode (insn))
11834    {
11835    case MODE_XI:
11836      gcc_assert (TARGET_AVX512F);
11837      /* FALLTHRU */
11838    case MODE_OI:
11839      gcc_assert (TARGET_AVX2);
11840      /* FALLTHRU */
11841    case MODE_TI:
11842      gcc_assert (TARGET_SSE2);
11843      tmp = "p<logic>";
11844      switch (<MODE>mode)
11845	{
11846	case E_V16SImode:
11847	case E_V8DImode:
11848	  ssesuffix = "<ssemodesuffix>";
11849	  break;
11850	case E_V8SImode:
11851	case E_V4DImode:
11852	case E_V4SImode:
11853	case E_V2DImode:
11854	  ssesuffix = (TARGET_AVX512VL
11855		       && (<mask_applied> || which_alternative == 2)
11856		       ? "<ssemodesuffix>" : "");
11857	  break;
11858	default:
11859	  gcc_unreachable ();
11860	}
11861      break;
11862
11863    case MODE_V8SF:
11864      gcc_assert (TARGET_AVX);
11865      /* FALLTHRU */
11866    case MODE_V4SF:
11867      gcc_assert (TARGET_SSE);
11868      tmp = "<logic>";
11869      ssesuffix = "ps";
11870      break;
11871
11872    default:
11873      gcc_unreachable ();
11874    }
11875
11876  switch (which_alternative)
11877    {
11878    case 0:
11879      if (<mask_applied>)
11880        ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11881      else
11882        ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11883      break;
11884    case 1:
11885    case 2:
11886      ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11887      break;
11888    default:
11889      gcc_unreachable ();
11890    }
11891
11892  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11893  return buf;
11894}
11895  [(set_attr "isa" "noavx,avx,avx")
11896   (set_attr "type" "sselog")
11897   (set (attr "prefix_data16")
11898     (if_then_else
11899       (and (eq_attr "alternative" "0")
11900	    (eq_attr "mode" "TI"))
11901       (const_string "1")
11902       (const_string "*")))
11903   (set_attr "prefix" "<mask_prefix3>,evex")
11904   (set (attr "mode")
11905	(cond [(and (match_test "<MODE_SIZE> == 16")
11906		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11907		 (const_string "<ssePSmode>")
11908	       (match_test "TARGET_AVX2")
11909		 (const_string "<sseinsnmode>")
11910	       (match_test "TARGET_AVX")
11911		 (if_then_else
11912		   (match_test "<MODE_SIZE> > 16")
11913		   (const_string "V8SF")
11914		   (const_string "<sseinsnmode>"))
11915	       (ior (not (match_test "TARGET_SSE2"))
11916		    (match_test "optimize_function_for_size_p (cfun)"))
11917		 (const_string "V4SF")
11918	      ]
11919	      (const_string "<sseinsnmode>")))])
11920
11921(define_insn "*<code><mode>3"
11922  [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
11923	(any_logic:VI12_AVX_AVX512F
11924	  (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11925	  (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11926  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11927{
11928  static char buf[64];
11929  const char *ops;
11930  const char *tmp;
11931  const char *ssesuffix;
11932
11933  switch (get_attr_mode (insn))
11934    {
11935    case MODE_XI:
11936      gcc_assert (TARGET_AVX512F);
11937      /* FALLTHRU */
11938    case MODE_OI:
11939      gcc_assert (TARGET_AVX2);
11940      /* FALLTHRU */
11941    case MODE_TI:
11942      gcc_assert (TARGET_SSE2);
11943      tmp = "p<logic>";
11944      switch (<MODE>mode)
11945	{
11946	case E_V64QImode:
11947	case E_V32HImode:
11948	  ssesuffix = "q";
11949	  break;
11950	case E_V32QImode:
11951	case E_V16HImode:
11952	case E_V16QImode:
11953	case E_V8HImode:
11954	  ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11955	  break;
11956	default:
11957	  gcc_unreachable ();
11958	}
11959      break;
11960
11961    case MODE_V8SF:
11962      gcc_assert (TARGET_AVX);
11963      /* FALLTHRU */
11964    case MODE_V4SF:
11965      gcc_assert (TARGET_SSE);
11966      tmp = "<logic>";
11967      ssesuffix = "ps";
11968      break;
11969
11970    default:
11971      gcc_unreachable ();
11972    }
11973
11974  switch (which_alternative)
11975    {
11976    case 0:
11977      ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11978      break;
11979    case 1:
11980    case 2:
11981      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11982      break;
11983    default:
11984      gcc_unreachable ();
11985    }
11986
11987  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11988  return buf;
11989}
11990  [(set_attr "isa" "noavx,avx,avx")
11991   (set_attr "type" "sselog")
11992   (set (attr "prefix_data16")
11993     (if_then_else
11994       (and (eq_attr "alternative" "0")
11995	    (eq_attr "mode" "TI"))
11996       (const_string "1")
11997       (const_string "*")))
11998   (set_attr "prefix" "orig,vex,evex")
11999   (set (attr "mode")
12000	(cond [(and (match_test "<MODE_SIZE> == 16")
12001		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12002		 (const_string "<ssePSmode>")
12003	       (match_test "TARGET_AVX2")
12004		 (const_string "<sseinsnmode>")
12005	       (match_test "TARGET_AVX")
12006		 (if_then_else
12007		   (match_test "<MODE_SIZE> > 16")
12008		   (const_string "V8SF")
12009		   (const_string "<sseinsnmode>"))
12010	       (ior (not (match_test "TARGET_SSE2"))
12011		    (match_test "optimize_function_for_size_p (cfun)"))
12012		 (const_string "V4SF")
12013	      ]
12014	      (const_string "<sseinsnmode>")))])
12015
12016(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12017  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12018	(unspec:<avx512fmaskmode>
12019	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12020	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
12021	 UNSPEC_TESTM))]
12022  "TARGET_AVX512BW"
12023  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12024  [(set_attr "prefix" "evex")
12025   (set_attr "mode"  "<sseinsnmode>")])
12026
12027(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12028  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12029	(unspec:<avx512fmaskmode>
12030	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12031	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
12032	 UNSPEC_TESTM))]
12033  "TARGET_AVX512F"
12034  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12035  [(set_attr "prefix" "evex")
12036   (set_attr "mode"  "<sseinsnmode>")])
12037
12038(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12039  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12040	(unspec:<avx512fmaskmode>
12041	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12042	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
12043	 UNSPEC_TESTNM))]
12044  "TARGET_AVX512BW"
12045  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12046  [(set_attr "prefix" "evex")
12047   (set_attr "mode"  "<sseinsnmode>")])
12048
12049(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12050  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12051	(unspec:<avx512fmaskmode>
12052	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12053	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
12054	 UNSPEC_TESTNM))]
12055  "TARGET_AVX512F"
12056  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12057  [(set_attr "prefix" "evex")
12058   (set_attr "mode"  "<sseinsnmode>")])
12059
12060;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12061;;
12062;; Parallel integral element swizzling
12063;;
12064;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12065
12066(define_expand "vec_pack_trunc_<mode>"
12067  [(match_operand:<ssepackmode> 0 "register_operand")
12068   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
12069   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
12070  "TARGET_SSE2"
12071{
12072  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
12073  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
12074  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
12075  DONE;
12076})
12077
12078(define_expand "vec_pack_trunc_qi"
12079  [(set (match_operand:HI 0 ("register_operand"))
12080        (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
12081                           (const_int 8))
12082                (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
12083  "TARGET_AVX512F")
12084
12085(define_expand "vec_pack_trunc_<mode>"
12086  [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
12087        (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
12088                           (match_dup 3))
12089                (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
12090  "TARGET_AVX512BW"
12091{
12092  operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
12093})
12094
12095(define_insn "<sse2_avx2>_packsswb<mask_name>"
12096  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12097	(vec_concat:VI1_AVX512
12098	  (ss_truncate:<ssehalfvecmode>
12099	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12100	  (ss_truncate:<ssehalfvecmode>
12101	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12102  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12103  "@
12104   packsswb\t{%2, %0|%0, %2}
12105   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12106   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12107  [(set_attr "isa" "noavx,avx,avx512bw")
12108   (set_attr "type" "sselog")
12109   (set_attr "prefix_data16" "1,*,*")
12110   (set_attr "prefix" "orig,<mask_prefix>,evex")
12111   (set_attr "mode" "<sseinsnmode>")])
12112
12113(define_insn "<sse2_avx2>_packssdw<mask_name>"
12114  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
12115	(vec_concat:VI2_AVX2
12116	  (ss_truncate:<ssehalfvecmode>
12117	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12118	  (ss_truncate:<ssehalfvecmode>
12119	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12120  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12121  "@
12122   packssdw\t{%2, %0|%0, %2}
12123   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12124   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12125  [(set_attr "isa" "noavx,avx,avx512bw")
12126   (set_attr "type" "sselog")
12127   (set_attr "prefix_data16" "1,*,*")
12128   (set_attr "prefix" "orig,<mask_prefix>,evex")
12129   (set_attr "mode" "<sseinsnmode>")])
12130
12131(define_insn "<sse2_avx2>_packuswb<mask_name>"
12132  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12133	(vec_concat:VI1_AVX512
12134	  (us_truncate:<ssehalfvecmode>
12135	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12136	  (us_truncate:<ssehalfvecmode>
12137	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12138  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12139  "@
12140   packuswb\t{%2, %0|%0, %2}
12141   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12142   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12143  [(set_attr "isa" "noavx,avx,avx512bw")
12144   (set_attr "type" "sselog")
12145   (set_attr "prefix_data16" "1,*,*")
12146   (set_attr "prefix" "orig,<mask_prefix>,evex")
12147   (set_attr "mode" "<sseinsnmode>")])
12148
12149(define_insn "avx512bw_interleave_highv64qi<mask_name>"
12150  [(set (match_operand:V64QI 0 "register_operand" "=v")
12151	(vec_select:V64QI
12152	  (vec_concat:V128QI
12153	    (match_operand:V64QI 1 "register_operand" "v")
12154	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12155	  (parallel [(const_int 8)  (const_int 72)
12156		     (const_int 9)  (const_int 73)
12157		     (const_int 10) (const_int 74)
12158		     (const_int 11) (const_int 75)
12159		     (const_int 12) (const_int 76)
12160		     (const_int 13) (const_int 77)
12161		     (const_int 14) (const_int 78)
12162		     (const_int 15) (const_int 79)
12163		     (const_int 24) (const_int 88)
12164		     (const_int 25) (const_int 89)
12165		     (const_int 26) (const_int 90)
12166		     (const_int 27) (const_int 91)
12167		     (const_int 28) (const_int 92)
12168		     (const_int 29) (const_int 93)
12169		     (const_int 30) (const_int 94)
12170		     (const_int 31) (const_int 95)
12171		     (const_int 40) (const_int 104)
12172		     (const_int 41) (const_int 105)
12173		     (const_int 42) (const_int 106)
12174		     (const_int 43) (const_int 107)
12175		     (const_int 44) (const_int 108)
12176		     (const_int 45) (const_int 109)
12177		     (const_int 46) (const_int 110)
12178		     (const_int 47) (const_int 111)
12179		     (const_int 56) (const_int 120)
12180		     (const_int 57) (const_int 121)
12181		     (const_int 58) (const_int 122)
12182		     (const_int 59) (const_int 123)
12183		     (const_int 60) (const_int 124)
12184		     (const_int 61) (const_int 125)
12185		     (const_int 62) (const_int 126)
12186		     (const_int 63) (const_int 127)])))]
12187  "TARGET_AVX512BW"
12188  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12189  [(set_attr "type" "sselog")
12190   (set_attr "prefix" "evex")
12191   (set_attr "mode" "XI")])
12192
12193(define_insn "avx2_interleave_highv32qi<mask_name>"
12194  [(set (match_operand:V32QI 0 "register_operand" "=v")
12195	(vec_select:V32QI
12196	  (vec_concat:V64QI
12197	    (match_operand:V32QI 1 "register_operand" "v")
12198	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12199	  (parallel [(const_int 8)  (const_int 40)
12200		     (const_int 9)  (const_int 41)
12201		     (const_int 10) (const_int 42)
12202		     (const_int 11) (const_int 43)
12203		     (const_int 12) (const_int 44)
12204		     (const_int 13) (const_int 45)
12205		     (const_int 14) (const_int 46)
12206		     (const_int 15) (const_int 47)
12207		     (const_int 24) (const_int 56)
12208		     (const_int 25) (const_int 57)
12209		     (const_int 26) (const_int 58)
12210		     (const_int 27) (const_int 59)
12211		     (const_int 28) (const_int 60)
12212		     (const_int 29) (const_int 61)
12213		     (const_int 30) (const_int 62)
12214		     (const_int 31) (const_int 63)])))]
12215  "TARGET_AVX2 && <mask_avx512vl_condition>"
12216  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12217  [(set_attr "type" "sselog")
12218   (set_attr "prefix" "<mask_prefix>")
12219   (set_attr "mode" "OI")])
12220
12221(define_insn "vec_interleave_highv16qi<mask_name>"
12222  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12223	(vec_select:V16QI
12224	  (vec_concat:V32QI
12225	    (match_operand:V16QI 1 "register_operand" "0,v")
12226	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12227	  (parallel [(const_int 8)  (const_int 24)
12228		     (const_int 9)  (const_int 25)
12229		     (const_int 10) (const_int 26)
12230		     (const_int 11) (const_int 27)
12231		     (const_int 12) (const_int 28)
12232		     (const_int 13) (const_int 29)
12233		     (const_int 14) (const_int 30)
12234		     (const_int 15) (const_int 31)])))]
12235  "TARGET_SSE2 && <mask_avx512vl_condition>"
12236  "@
12237   punpckhbw\t{%2, %0|%0, %2}
12238   vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12239  [(set_attr "isa" "noavx,avx")
12240   (set_attr "type" "sselog")
12241   (set_attr "prefix_data16" "1,*")
12242   (set_attr "prefix" "orig,<mask_prefix>")
12243   (set_attr "mode" "TI")])
12244
12245(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
12246  [(set (match_operand:V64QI 0 "register_operand" "=v")
12247	(vec_select:V64QI
12248	  (vec_concat:V128QI
12249	    (match_operand:V64QI 1 "register_operand" "v")
12250	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12251	  (parallel [(const_int 0) (const_int 64)
12252		     (const_int 1) (const_int 65)
12253		     (const_int 2) (const_int 66)
12254		     (const_int 3) (const_int 67)
12255		     (const_int 4) (const_int 68)
12256		     (const_int 5) (const_int 69)
12257		     (const_int 6) (const_int 70)
12258		     (const_int 7) (const_int 71)
12259		     (const_int 16) (const_int 80)
12260		     (const_int 17) (const_int 81)
12261		     (const_int 18) (const_int 82)
12262		     (const_int 19) (const_int 83)
12263		     (const_int 20) (const_int 84)
12264		     (const_int 21) (const_int 85)
12265		     (const_int 22) (const_int 86)
12266		     (const_int 23) (const_int 87)
12267		     (const_int 32) (const_int 96)
12268		     (const_int 33) (const_int 97)
12269		     (const_int 34) (const_int 98)
12270		     (const_int 35) (const_int 99)
12271		     (const_int 36) (const_int 100)
12272		     (const_int 37) (const_int 101)
12273		     (const_int 38) (const_int 102)
12274		     (const_int 39) (const_int 103)
12275		     (const_int 48) (const_int 112)
12276		     (const_int 49) (const_int 113)
12277		     (const_int 50) (const_int 114)
12278		     (const_int 51) (const_int 115)
12279		     (const_int 52) (const_int 116)
12280		     (const_int 53) (const_int 117)
12281		     (const_int 54) (const_int 118)
12282		     (const_int 55) (const_int 119)])))]
12283  "TARGET_AVX512BW"
12284  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12285  [(set_attr "type" "sselog")
12286   (set_attr "prefix" "evex")
12287   (set_attr "mode" "XI")])
12288
12289(define_insn "avx2_interleave_lowv32qi<mask_name>"
12290  [(set (match_operand:V32QI 0 "register_operand" "=v")
12291	(vec_select:V32QI
12292	  (vec_concat:V64QI
12293	    (match_operand:V32QI 1 "register_operand" "v")
12294	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12295	  (parallel [(const_int 0) (const_int 32)
12296		     (const_int 1) (const_int 33)
12297		     (const_int 2) (const_int 34)
12298		     (const_int 3) (const_int 35)
12299		     (const_int 4) (const_int 36)
12300		     (const_int 5) (const_int 37)
12301		     (const_int 6) (const_int 38)
12302		     (const_int 7) (const_int 39)
12303		     (const_int 16) (const_int 48)
12304		     (const_int 17) (const_int 49)
12305		     (const_int 18) (const_int 50)
12306		     (const_int 19) (const_int 51)
12307		     (const_int 20) (const_int 52)
12308		     (const_int 21) (const_int 53)
12309		     (const_int 22) (const_int 54)
12310		     (const_int 23) (const_int 55)])))]
12311  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12312  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12313  [(set_attr "type" "sselog")
12314   (set_attr "prefix" "maybe_vex")
12315   (set_attr "mode" "OI")])
12316
12317(define_insn "vec_interleave_lowv16qi<mask_name>"
12318  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12319	(vec_select:V16QI
12320	  (vec_concat:V32QI
12321	    (match_operand:V16QI 1 "register_operand" "0,v")
12322	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12323	  (parallel [(const_int 0) (const_int 16)
12324		     (const_int 1) (const_int 17)
12325		     (const_int 2) (const_int 18)
12326		     (const_int 3) (const_int 19)
12327		     (const_int 4) (const_int 20)
12328		     (const_int 5) (const_int 21)
12329		     (const_int 6) (const_int 22)
12330		     (const_int 7) (const_int 23)])))]
12331  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12332  "@
12333   punpcklbw\t{%2, %0|%0, %2}
12334   vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12335  [(set_attr "isa" "noavx,avx")
12336   (set_attr "type" "sselog")
12337   (set_attr "prefix_data16" "1,*")
12338   (set_attr "prefix" "orig,vex")
12339   (set_attr "mode" "TI")])
12340
12341(define_insn "avx512bw_interleave_highv32hi<mask_name>"
12342  [(set (match_operand:V32HI 0 "register_operand" "=v")
12343	(vec_select:V32HI
12344	  (vec_concat:V64HI
12345	    (match_operand:V32HI 1 "register_operand" "v")
12346	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12347	  (parallel [(const_int 4) (const_int 36)
12348		     (const_int 5) (const_int 37)
12349		     (const_int 6) (const_int 38)
12350		     (const_int 7) (const_int 39)
12351		     (const_int 12) (const_int 44)
12352		     (const_int 13) (const_int 45)
12353		     (const_int 14) (const_int 46)
12354		     (const_int 15) (const_int 47)
12355		     (const_int 20) (const_int 52)
12356		     (const_int 21) (const_int 53)
12357		     (const_int 22) (const_int 54)
12358		     (const_int 23) (const_int 55)
12359		     (const_int 28) (const_int 60)
12360		     (const_int 29) (const_int 61)
12361		     (const_int 30) (const_int 62)
12362		     (const_int 31) (const_int 63)])))]
12363  "TARGET_AVX512BW"
12364  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12365  [(set_attr "type" "sselog")
12366   (set_attr "prefix" "evex")
12367   (set_attr "mode" "XI")])
12368
12369(define_insn "avx2_interleave_highv16hi<mask_name>"
12370  [(set (match_operand:V16HI 0 "register_operand" "=v")
12371	(vec_select:V16HI
12372	  (vec_concat:V32HI
12373	    (match_operand:V16HI 1 "register_operand" "v")
12374	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12375	  (parallel [(const_int 4) (const_int 20)
12376		     (const_int 5) (const_int 21)
12377		     (const_int 6) (const_int 22)
12378		     (const_int 7) (const_int 23)
12379		     (const_int 12) (const_int 28)
12380		     (const_int 13) (const_int 29)
12381		     (const_int 14) (const_int 30)
12382		     (const_int 15) (const_int 31)])))]
12383  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12384  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12385  [(set_attr "type" "sselog")
12386   (set_attr "prefix" "maybe_evex")
12387   (set_attr "mode" "OI")])
12388
12389(define_insn "vec_interleave_highv8hi<mask_name>"
12390  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12391	(vec_select:V8HI
12392	  (vec_concat:V16HI
12393	    (match_operand:V8HI 1 "register_operand" "0,v")
12394	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12395	  (parallel [(const_int 4) (const_int 12)
12396		     (const_int 5) (const_int 13)
12397		     (const_int 6) (const_int 14)
12398		     (const_int 7) (const_int 15)])))]
12399  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12400  "@
12401   punpckhwd\t{%2, %0|%0, %2}
12402   vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12403  [(set_attr "isa" "noavx,avx")
12404   (set_attr "type" "sselog")
12405   (set_attr "prefix_data16" "1,*")
12406   (set_attr "prefix" "orig,maybe_vex")
12407   (set_attr "mode" "TI")])
12408
12409(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
12410  [(set (match_operand:V32HI 0 "register_operand" "=v")
12411	(vec_select:V32HI
12412	  (vec_concat:V64HI
12413	    (match_operand:V32HI 1 "register_operand" "v")
12414	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12415	  (parallel [(const_int 0) (const_int 32)
12416		     (const_int 1) (const_int 33)
12417		     (const_int 2) (const_int 34)
12418		     (const_int 3) (const_int 35)
12419		     (const_int 8) (const_int 40)
12420		     (const_int 9) (const_int 41)
12421		     (const_int 10) (const_int 42)
12422		     (const_int 11) (const_int 43)
12423		     (const_int 16) (const_int 48)
12424		     (const_int 17) (const_int 49)
12425		     (const_int 18) (const_int 50)
12426		     (const_int 19) (const_int 51)
12427		     (const_int 24) (const_int 56)
12428		     (const_int 25) (const_int 57)
12429		     (const_int 26) (const_int 58)
12430		     (const_int 27) (const_int 59)])))]
12431  "TARGET_AVX512BW"
12432  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12433  [(set_attr "type" "sselog")
12434   (set_attr "prefix" "evex")
12435   (set_attr "mode" "XI")])
12436
12437(define_insn "avx2_interleave_lowv16hi<mask_name>"
12438  [(set (match_operand:V16HI 0 "register_operand" "=v")
12439	(vec_select:V16HI
12440	  (vec_concat:V32HI
12441	    (match_operand:V16HI 1 "register_operand" "v")
12442	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12443	  (parallel [(const_int 0) (const_int 16)
12444		     (const_int 1) (const_int 17)
12445		     (const_int 2) (const_int 18)
12446		     (const_int 3) (const_int 19)
12447		     (const_int 8) (const_int 24)
12448		     (const_int 9) (const_int 25)
12449		     (const_int 10) (const_int 26)
12450		     (const_int 11) (const_int 27)])))]
12451  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12452  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12453  [(set_attr "type" "sselog")
12454   (set_attr "prefix" "maybe_evex")
12455   (set_attr "mode" "OI")])
12456
12457(define_insn "vec_interleave_lowv8hi<mask_name>"
12458  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12459	(vec_select:V8HI
12460	  (vec_concat:V16HI
12461	    (match_operand:V8HI 1 "register_operand" "0,v")
12462	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12463	  (parallel [(const_int 0) (const_int 8)
12464		     (const_int 1) (const_int 9)
12465		     (const_int 2) (const_int 10)
12466		     (const_int 3) (const_int 11)])))]
12467  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12468  "@
12469   punpcklwd\t{%2, %0|%0, %2}
12470   vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12471  [(set_attr "isa" "noavx,avx")
12472   (set_attr "type" "sselog")
12473   (set_attr "prefix_data16" "1,*")
12474   (set_attr "prefix" "orig,maybe_evex")
12475   (set_attr "mode" "TI")])
12476
12477(define_insn "avx2_interleave_highv8si<mask_name>"
12478  [(set (match_operand:V8SI 0 "register_operand" "=v")
12479	(vec_select:V8SI
12480	  (vec_concat:V16SI
12481	    (match_operand:V8SI 1 "register_operand" "v")
12482	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12483	  (parallel [(const_int 2) (const_int 10)
12484		     (const_int 3) (const_int 11)
12485		     (const_int 6) (const_int 14)
12486		     (const_int 7) (const_int 15)])))]
12487  "TARGET_AVX2 && <mask_avx512vl_condition>"
12488  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12489  [(set_attr "type" "sselog")
12490   (set_attr "prefix" "maybe_evex")
12491   (set_attr "mode" "OI")])
12492
12493(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12494  [(set (match_operand:V16SI 0 "register_operand" "=v")
12495	(vec_select:V16SI
12496	  (vec_concat:V32SI
12497	    (match_operand:V16SI 1 "register_operand" "v")
12498	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12499	  (parallel [(const_int 2) (const_int 18)
12500		     (const_int 3) (const_int 19)
12501		     (const_int 6) (const_int 22)
12502		     (const_int 7) (const_int 23)
12503		     (const_int 10) (const_int 26)
12504		     (const_int 11) (const_int 27)
12505		     (const_int 14) (const_int 30)
12506		     (const_int 15) (const_int 31)])))]
12507  "TARGET_AVX512F"
12508  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12509  [(set_attr "type" "sselog")
12510   (set_attr "prefix" "evex")
12511   (set_attr "mode" "XI")])
12512
12513
12514(define_insn "vec_interleave_highv4si<mask_name>"
12515  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12516	(vec_select:V4SI
12517	  (vec_concat:V8SI
12518	    (match_operand:V4SI 1 "register_operand" "0,v")
12519	    (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12520	  (parallel [(const_int 2) (const_int 6)
12521		     (const_int 3) (const_int 7)])))]
12522  "TARGET_SSE2 && <mask_avx512vl_condition>"
12523  "@
12524   punpckhdq\t{%2, %0|%0, %2}
12525   vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12526  [(set_attr "isa" "noavx,avx")
12527   (set_attr "type" "sselog")
12528   (set_attr "prefix_data16" "1,*")
12529   (set_attr "prefix" "orig,maybe_vex")
12530   (set_attr "mode" "TI")])
12531
12532(define_insn "avx2_interleave_lowv8si<mask_name>"
12533  [(set (match_operand:V8SI 0 "register_operand" "=v")
12534	(vec_select:V8SI
12535	  (vec_concat:V16SI
12536	    (match_operand:V8SI 1 "register_operand" "v")
12537	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12538	  (parallel [(const_int 0) (const_int 8)
12539		     (const_int 1) (const_int 9)
12540		     (const_int 4) (const_int 12)
12541		     (const_int 5) (const_int 13)])))]
12542  "TARGET_AVX2 && <mask_avx512vl_condition>"
12543  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12544  [(set_attr "type" "sselog")
12545   (set_attr "prefix" "maybe_evex")
12546   (set_attr "mode" "OI")])
12547
12548(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12549  [(set (match_operand:V16SI 0 "register_operand" "=v")
12550	(vec_select:V16SI
12551	  (vec_concat:V32SI
12552	    (match_operand:V16SI 1 "register_operand" "v")
12553	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12554	  (parallel [(const_int 0) (const_int 16)
12555		     (const_int 1) (const_int 17)
12556		     (const_int 4) (const_int 20)
12557		     (const_int 5) (const_int 21)
12558		     (const_int 8) (const_int 24)
12559		     (const_int 9) (const_int 25)
12560		     (const_int 12) (const_int 28)
12561		     (const_int 13) (const_int 29)])))]
12562  "TARGET_AVX512F"
12563  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12564  [(set_attr "type" "sselog")
12565   (set_attr "prefix" "evex")
12566   (set_attr "mode" "XI")])
12567
12568(define_insn "vec_interleave_lowv4si<mask_name>"
12569  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12570	(vec_select:V4SI
12571	  (vec_concat:V8SI
12572	    (match_operand:V4SI 1 "register_operand" "0,v")
12573	    (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12574	  (parallel [(const_int 0) (const_int 4)
12575		     (const_int 1) (const_int 5)])))]
12576  "TARGET_SSE2 && <mask_avx512vl_condition>"
12577  "@
12578   punpckldq\t{%2, %0|%0, %2}
12579   vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12580  [(set_attr "isa" "noavx,avx")
12581   (set_attr "type" "sselog")
12582   (set_attr "prefix_data16" "1,*")
12583   (set_attr "prefix" "orig,vex")
12584   (set_attr "mode" "TI")])
12585
12586(define_expand "vec_interleave_high<mode>"
12587  [(match_operand:VI_256 0 "register_operand")
12588   (match_operand:VI_256 1 "register_operand")
12589   (match_operand:VI_256 2 "nonimmediate_operand")]
12590 "TARGET_AVX2"
12591{
12592  rtx t1 = gen_reg_rtx (<MODE>mode);
12593  rtx t2 = gen_reg_rtx (<MODE>mode);
12594  rtx t3 = gen_reg_rtx (V4DImode);
12595  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12596  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
12597  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12598				gen_lowpart (V4DImode, t2),
12599				GEN_INT (1 + (3 << 4))));
12600  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12601  DONE;
12602})
12603
12604(define_expand "vec_interleave_low<mode>"
12605  [(match_operand:VI_256 0 "register_operand")
12606   (match_operand:VI_256 1 "register_operand")
12607   (match_operand:VI_256 2 "nonimmediate_operand")]
12608 "TARGET_AVX2"
12609{
12610  rtx t1 = gen_reg_rtx (<MODE>mode);
12611  rtx t2 = gen_reg_rtx (<MODE>mode);
12612  rtx t3 = gen_reg_rtx (V4DImode);
12613  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12614  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12615  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12616				gen_lowpart (V4DImode, t2),
12617				GEN_INT (0 + (2 << 4))));
12618  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12619  DONE;
12620})
12621
12622;; Modes handled by pinsr patterns.
12623(define_mode_iterator PINSR_MODE
12624  [(V16QI "TARGET_SSE4_1") V8HI
12625   (V4SI "TARGET_SSE4_1")
12626   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12627
12628(define_mode_attr sse2p4_1
12629  [(V16QI "sse4_1") (V8HI "sse2")
12630   (V4SI "sse4_1") (V2DI "sse4_1")])
12631
12632(define_mode_attr pinsr_evex_isa
12633  [(V16QI "avx512bw") (V8HI "avx512bw")
12634   (V4SI "avx512dq") (V2DI "avx512dq")])
12635
12636;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12637(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12638  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
12639	(vec_merge:PINSR_MODE
12640	  (vec_duplicate:PINSR_MODE
12641	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
12642	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
12643	  (match_operand:SI 3 "const_int_operand")))]
12644  "TARGET_SSE2
12645   && ((unsigned) exact_log2 (INTVAL (operands[3]))
12646       < GET_MODE_NUNITS (<MODE>mode))"
12647{
12648  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
12649
12650  switch (which_alternative)
12651    {
12652    case 0:
12653      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12654	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
12655      /* FALLTHRU */
12656    case 1:
12657      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
12658    case 2:
12659    case 4:
12660      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12661	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
12662      /* FALLTHRU */
12663    case 3:
12664    case 5:
12665      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12666    default:
12667      gcc_unreachable ();
12668    }
12669}
12670  [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
12671   (set_attr "type" "sselog")
12672   (set (attr "prefix_rex")
12673     (if_then_else
12674       (and (not (match_test "TARGET_AVX"))
12675	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12676       (const_string "1")
12677       (const_string "*")))
12678   (set (attr "prefix_data16")
12679     (if_then_else
12680       (and (not (match_test "TARGET_AVX"))
12681	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12682       (const_string "1")
12683       (const_string "*")))
12684   (set (attr "prefix_extra")
12685     (if_then_else
12686       (and (not (match_test "TARGET_AVX"))
12687	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12688       (const_string "*")
12689       (const_string "1")))
12690   (set_attr "length_immediate" "1")
12691   (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
12692   (set_attr "mode" "TI")])
12693
12694(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12695  [(match_operand:AVX512_VEC 0 "register_operand")
12696   (match_operand:AVX512_VEC 1 "register_operand")
12697   (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12698   (match_operand:SI 3 "const_0_to_3_operand")
12699   (match_operand:AVX512_VEC 4 "register_operand")
12700   (match_operand:<avx512fmaskmode> 5 "register_operand")]
12701  "TARGET_AVX512F"
12702{
12703  int mask, selector;
12704  mask = INTVAL (operands[3]);
12705  selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
12706  	      ? 0xFFFF ^ (0x000F << mask * 4)
12707	      : 0xFF ^ (0x03 << mask * 2));
12708  emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12709    (operands[0], operands[1], operands[2], GEN_INT (selector),
12710     operands[4], operands[5]));
12711  DONE;
12712})
12713
12714(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12715  [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12716	(vec_merge:AVX512_VEC
12717	  (match_operand:AVX512_VEC 1 "register_operand" "v")
12718	  (vec_duplicate:AVX512_VEC
12719		(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12720	  (match_operand:SI 3 "const_int_operand" "n")))]
12721  "TARGET_AVX512F"
12722{
12723  int mask;
12724  int selector = INTVAL (operands[3]);
12725
12726  if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
12727    mask = 0;
12728  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
12729    mask = 1;
12730  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
12731    mask = 2;
12732  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
12733    mask = 3;
12734  else
12735    gcc_unreachable ();
12736
12737  operands[3] = GEN_INT (mask);
12738
12739  return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12740}
12741  [(set_attr "type" "sselog")
12742   (set_attr "length_immediate" "1")
12743   (set_attr "prefix" "evex")
12744   (set_attr "mode" "<sseinsnmode>")])
12745
12746(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12747  [(match_operand:AVX512_VEC_2 0 "register_operand")
12748   (match_operand:AVX512_VEC_2 1 "register_operand")
12749   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12750   (match_operand:SI 3 "const_0_to_1_operand")
12751   (match_operand:AVX512_VEC_2 4 "register_operand")
12752   (match_operand:<avx512fmaskmode> 5 "register_operand")]
12753  "TARGET_AVX512F"
12754{
12755  int mask = INTVAL (operands[3]);
12756  if (mask == 0)
12757    emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
12758					   operands[2], operands[4],
12759					   operands[5]));
12760  else
12761    emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12762					   operands[2], operands[4],
12763					   operands[5]));
12764  DONE;
12765})
12766
12767(define_insn "vec_set_lo_<mode><mask_name>"
12768  [(set (match_operand:V16FI 0 "register_operand" "=v")
12769	(vec_concat:V16FI
12770	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12771	  (vec_select:<ssehalfvecmode>
12772	    (match_operand:V16FI 1 "register_operand" "v")
12773	    (parallel [(const_int 8) (const_int 9)
12774		       (const_int 10) (const_int 11)
12775		       (const_int 12) (const_int 13)
12776		       (const_int 14) (const_int 15)]))))]
12777  "TARGET_AVX512DQ"
12778  "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12779  [(set_attr "type" "sselog")
12780   (set_attr "length_immediate" "1")
12781   (set_attr "prefix" "evex")
12782   (set_attr "mode" "<sseinsnmode>")])
12783
12784(define_insn "vec_set_hi_<mode><mask_name>"
12785  [(set (match_operand:V16FI 0 "register_operand" "=v")
12786	(vec_concat:V16FI
12787	  (vec_select:<ssehalfvecmode>
12788	    (match_operand:V16FI 1 "register_operand" "v")
12789	    (parallel [(const_int 0) (const_int 1)
12790		       (const_int 2) (const_int 3)
12791		       (const_int 4) (const_int 5)
12792		       (const_int 6) (const_int 7)]))
12793	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12794  "TARGET_AVX512DQ"
12795  "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12796  [(set_attr "type" "sselog")
12797   (set_attr "length_immediate" "1")
12798   (set_attr "prefix" "evex")
12799   (set_attr "mode" "<sseinsnmode>")])
12800
12801(define_insn "vec_set_lo_<mode><mask_name>"
12802  [(set (match_operand:V8FI 0 "register_operand" "=v")
12803	(vec_concat:V8FI
12804	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12805	  (vec_select:<ssehalfvecmode>
12806	    (match_operand:V8FI 1 "register_operand" "v")
12807	    (parallel [(const_int 4) (const_int 5)
12808		       (const_int 6) (const_int 7)]))))]
12809  "TARGET_AVX512F"
12810  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12811  [(set_attr "type" "sselog")
12812   (set_attr "length_immediate" "1")
12813   (set_attr "prefix" "evex")
12814   (set_attr "mode" "XI")])
12815
12816(define_insn "vec_set_hi_<mode><mask_name>"
12817  [(set (match_operand:V8FI 0 "register_operand" "=v")
12818	(vec_concat:V8FI
12819	  (vec_select:<ssehalfvecmode>
12820	    (match_operand:V8FI 1 "register_operand" "v")
12821	    (parallel [(const_int 0) (const_int 1)
12822		       (const_int 2) (const_int 3)]))
12823	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12824  "TARGET_AVX512F"
12825  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12826  [(set_attr "type" "sselog")
12827   (set_attr "length_immediate" "1")
12828   (set_attr "prefix" "evex")
12829   (set_attr "mode" "XI")])
12830
12831(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12832  [(match_operand:VI8F_256 0 "register_operand")
12833   (match_operand:VI8F_256 1 "register_operand")
12834   (match_operand:VI8F_256 2 "nonimmediate_operand")
12835   (match_operand:SI 3 "const_0_to_3_operand")
12836   (match_operand:VI8F_256 4 "register_operand")
12837   (match_operand:QI 5 "register_operand")]
12838  "TARGET_AVX512DQ"
12839{
12840  int mask = INTVAL (operands[3]);
12841  emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12842      (operands[0], operands[1], operands[2],
12843       GEN_INT (((mask >> 0) & 1) * 2 + 0),
12844       GEN_INT (((mask >> 0) & 1) * 2 + 1),
12845       GEN_INT (((mask >> 1) & 1) * 2 + 4),
12846       GEN_INT (((mask >> 1) & 1) * 2 + 5),
12847       operands[4], operands[5]));
12848  DONE;
12849})
12850
12851(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12852  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12853	(vec_select:VI8F_256
12854	  (vec_concat:<ssedoublemode>
12855	    (match_operand:VI8F_256 1 "register_operand" "v")
12856	    (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12857	  (parallel [(match_operand 3  "const_0_to_3_operand")
12858		     (match_operand 4  "const_0_to_3_operand")
12859		     (match_operand 5  "const_4_to_7_operand")
12860		     (match_operand 6  "const_4_to_7_operand")])))]
12861  "TARGET_AVX512VL
12862   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12863       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12864{
12865  int mask;
12866  mask = INTVAL (operands[3]) / 2;
12867  mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12868  operands[3] = GEN_INT (mask);
12869  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12870}
12871  [(set_attr "type" "sselog")
12872   (set_attr "length_immediate" "1")
12873   (set_attr "prefix" "evex")
12874   (set_attr "mode" "XI")])
12875
12876(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12877  [(match_operand:V8FI 0 "register_operand")
12878   (match_operand:V8FI 1 "register_operand")
12879   (match_operand:V8FI 2 "nonimmediate_operand")
12880   (match_operand:SI 3 "const_0_to_255_operand")
12881   (match_operand:V8FI 4 "register_operand")
12882   (match_operand:QI 5 "register_operand")]
12883  "TARGET_AVX512F"
12884{
12885  int mask = INTVAL (operands[3]);
12886  emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12887      (operands[0], operands[1], operands[2],
12888       GEN_INT (((mask >> 0) & 3) * 2),
12889       GEN_INT (((mask >> 0) & 3) * 2 + 1),
12890       GEN_INT (((mask >> 2) & 3) * 2),
12891       GEN_INT (((mask >> 2) & 3) * 2 + 1),
12892       GEN_INT (((mask >> 4) & 3) * 2 + 8),
12893       GEN_INT (((mask >> 4) & 3) * 2 + 9),
12894       GEN_INT (((mask >> 6) & 3) * 2 + 8),
12895       GEN_INT (((mask >> 6) & 3) * 2 + 9),
12896       operands[4], operands[5]));
12897  DONE;
12898})
12899
12900(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12901  [(set (match_operand:V8FI 0 "register_operand" "=v")
12902	(vec_select:V8FI
12903	  (vec_concat:<ssedoublemode>
12904	    (match_operand:V8FI 1 "register_operand" "v")
12905	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12906	  (parallel [(match_operand 3  "const_0_to_7_operand")
12907		     (match_operand 4  "const_0_to_7_operand")
12908		     (match_operand 5  "const_0_to_7_operand")
12909		     (match_operand 6  "const_0_to_7_operand")
12910		     (match_operand 7  "const_8_to_15_operand")
12911		     (match_operand 8  "const_8_to_15_operand")
12912		     (match_operand 9  "const_8_to_15_operand")
12913		     (match_operand 10  "const_8_to_15_operand")])))]
12914  "TARGET_AVX512F
12915   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12916       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12917       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12918       && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12919{
12920  int mask;
12921  mask = INTVAL (operands[3]) / 2;
12922  mask |= INTVAL (operands[5]) / 2 << 2;
12923  mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12924  mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12925  operands[3] = GEN_INT (mask);
12926
12927  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12928}
12929  [(set_attr "type" "sselog")
12930   (set_attr "length_immediate" "1")
12931   (set_attr "prefix" "evex")
12932   (set_attr "mode" "<sseinsnmode>")])
12933
12934(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12935  [(match_operand:VI4F_256 0 "register_operand")
12936   (match_operand:VI4F_256 1 "register_operand")
12937   (match_operand:VI4F_256 2 "nonimmediate_operand")
12938   (match_operand:SI 3 "const_0_to_3_operand")
12939   (match_operand:VI4F_256 4 "register_operand")
12940   (match_operand:QI 5 "register_operand")]
12941  "TARGET_AVX512VL"
12942{
12943  int mask = INTVAL (operands[3]);
12944  emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12945      (operands[0], operands[1], operands[2],
12946       GEN_INT (((mask >> 0) & 1) * 4 + 0),
12947       GEN_INT (((mask >> 0) & 1) * 4 + 1),
12948       GEN_INT (((mask >> 0) & 1) * 4 + 2),
12949       GEN_INT (((mask >> 0) & 1) * 4 + 3),
12950       GEN_INT (((mask >> 1) & 1) * 4 + 8),
12951       GEN_INT (((mask >> 1) & 1) * 4 + 9),
12952       GEN_INT (((mask >> 1) & 1) * 4 + 10),
12953       GEN_INT (((mask >> 1) & 1) * 4 + 11),
12954       operands[4], operands[5]));
12955  DONE;
12956})
12957
12958(define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12959  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12960	(vec_select:VI4F_256
12961	  (vec_concat:<ssedoublemode>
12962	    (match_operand:VI4F_256 1 "register_operand" "v")
12963	    (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12964	  (parallel [(match_operand 3  "const_0_to_7_operand")
12965		     (match_operand 4  "const_0_to_7_operand")
12966		     (match_operand 5  "const_0_to_7_operand")
12967		     (match_operand 6  "const_0_to_7_operand")
12968		     (match_operand 7  "const_8_to_15_operand")
12969		     (match_operand 8  "const_8_to_15_operand")
12970		     (match_operand 9  "const_8_to_15_operand")
12971		     (match_operand 10 "const_8_to_15_operand")])))]
12972  "TARGET_AVX512VL
12973   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12974       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12975       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12976       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12977       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12978       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12979{
12980  int mask;
12981  mask = INTVAL (operands[3]) / 4;
12982  mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12983  operands[3] = GEN_INT (mask);
12984
12985  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12986}
12987  [(set_attr "type" "sselog")
12988   (set_attr "length_immediate" "1")
12989   (set_attr "prefix" "evex")
12990   (set_attr "mode" "<sseinsnmode>")])
12991
12992(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12993  [(match_operand:V16FI 0 "register_operand")
12994   (match_operand:V16FI 1 "register_operand")
12995   (match_operand:V16FI 2 "nonimmediate_operand")
12996   (match_operand:SI 3 "const_0_to_255_operand")
12997   (match_operand:V16FI 4 "register_operand")
12998   (match_operand:HI 5 "register_operand")]
12999  "TARGET_AVX512F"
13000{
13001  int mask = INTVAL (operands[3]);
13002  emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
13003      (operands[0], operands[1], operands[2],
13004       GEN_INT (((mask >> 0) & 3) * 4),
13005       GEN_INT (((mask >> 0) & 3) * 4 + 1),
13006       GEN_INT (((mask >> 0) & 3) * 4 + 2),
13007       GEN_INT (((mask >> 0) & 3) * 4 + 3),
13008       GEN_INT (((mask >> 2) & 3) * 4),
13009       GEN_INT (((mask >> 2) & 3) * 4 + 1),
13010       GEN_INT (((mask >> 2) & 3) * 4 + 2),
13011       GEN_INT (((mask >> 2) & 3) * 4 + 3),
13012       GEN_INT (((mask >> 4) & 3) * 4 + 16),
13013       GEN_INT (((mask >> 4) & 3) * 4 + 17),
13014       GEN_INT (((mask >> 4) & 3) * 4 + 18),
13015       GEN_INT (((mask >> 4) & 3) * 4 + 19),
13016       GEN_INT (((mask >> 6) & 3) * 4 + 16),
13017       GEN_INT (((mask >> 6) & 3) * 4 + 17),
13018       GEN_INT (((mask >> 6) & 3) * 4 + 18),
13019       GEN_INT (((mask >> 6) & 3) * 4 + 19),
13020       operands[4], operands[5]));
13021  DONE;
13022})
13023
13024(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
13025  [(set (match_operand:V16FI 0 "register_operand" "=v")
13026	(vec_select:V16FI
13027	  (vec_concat:<ssedoublemode>
13028	    (match_operand:V16FI 1 "register_operand" "v")
13029	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
13030	  (parallel [(match_operand 3  "const_0_to_15_operand")
13031		     (match_operand 4  "const_0_to_15_operand")
13032		     (match_operand 5  "const_0_to_15_operand")
13033		     (match_operand 6  "const_0_to_15_operand")
13034		     (match_operand 7  "const_0_to_15_operand")
13035		     (match_operand 8  "const_0_to_15_operand")
13036		     (match_operand 9  "const_0_to_15_operand")
13037		     (match_operand 10  "const_0_to_15_operand")
13038		     (match_operand 11  "const_16_to_31_operand")
13039		     (match_operand 12  "const_16_to_31_operand")
13040		     (match_operand 13  "const_16_to_31_operand")
13041		     (match_operand 14  "const_16_to_31_operand")
13042		     (match_operand 15  "const_16_to_31_operand")
13043		     (match_operand 16  "const_16_to_31_operand")
13044		     (match_operand 17  "const_16_to_31_operand")
13045		     (match_operand 18  "const_16_to_31_operand")])))]
13046  "TARGET_AVX512F
13047   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
13048       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
13049       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
13050       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
13051       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
13052       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
13053       && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
13054       && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
13055       && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
13056       && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
13057       && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
13058       && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
13059{
13060  int mask;
13061  mask = INTVAL (operands[3]) / 4;
13062  mask |= INTVAL (operands[7]) / 4 << 2;
13063  mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
13064  mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
13065  operands[3] = GEN_INT (mask);
13066
13067  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
13068}
13069  [(set_attr "type" "sselog")
13070   (set_attr "length_immediate" "1")
13071   (set_attr "prefix" "evex")
13072   (set_attr "mode" "<sseinsnmode>")])
13073
13074(define_expand "avx512f_pshufdv3_mask"
13075  [(match_operand:V16SI 0 "register_operand")
13076   (match_operand:V16SI 1 "nonimmediate_operand")
13077   (match_operand:SI 2 "const_0_to_255_operand")
13078   (match_operand:V16SI 3 "register_operand")
13079   (match_operand:HI 4 "register_operand")]
13080  "TARGET_AVX512F"
13081{
13082  int mask = INTVAL (operands[2]);
13083  emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
13084				       GEN_INT ((mask >> 0) & 3),
13085				       GEN_INT ((mask >> 2) & 3),
13086				       GEN_INT ((mask >> 4) & 3),
13087				       GEN_INT ((mask >> 6) & 3),
13088				       GEN_INT (((mask >> 0) & 3) + 4),
13089				       GEN_INT (((mask >> 2) & 3) + 4),
13090				       GEN_INT (((mask >> 4) & 3) + 4),
13091				       GEN_INT (((mask >> 6) & 3) + 4),
13092				       GEN_INT (((mask >> 0) & 3) + 8),
13093				       GEN_INT (((mask >> 2) & 3) + 8),
13094				       GEN_INT (((mask >> 4) & 3) + 8),
13095				       GEN_INT (((mask >> 6) & 3) + 8),
13096				       GEN_INT (((mask >> 0) & 3) + 12),
13097				       GEN_INT (((mask >> 2) & 3) + 12),
13098				       GEN_INT (((mask >> 4) & 3) + 12),
13099				       GEN_INT (((mask >> 6) & 3) + 12),
13100				       operands[3], operands[4]));
13101  DONE;
13102})
13103
13104(define_insn "avx512f_pshufd_1<mask_name>"
13105  [(set (match_operand:V16SI 0 "register_operand" "=v")
13106	(vec_select:V16SI
13107	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")
13108	  (parallel [(match_operand 2 "const_0_to_3_operand")
13109		     (match_operand 3 "const_0_to_3_operand")
13110		     (match_operand 4 "const_0_to_3_operand")
13111		     (match_operand 5 "const_0_to_3_operand")
13112		     (match_operand 6 "const_4_to_7_operand")
13113		     (match_operand 7 "const_4_to_7_operand")
13114		     (match_operand 8 "const_4_to_7_operand")
13115		     (match_operand 9 "const_4_to_7_operand")
13116		     (match_operand 10 "const_8_to_11_operand")
13117		     (match_operand 11 "const_8_to_11_operand")
13118		     (match_operand 12 "const_8_to_11_operand")
13119		     (match_operand 13 "const_8_to_11_operand")
13120		     (match_operand 14 "const_12_to_15_operand")
13121		     (match_operand 15 "const_12_to_15_operand")
13122		     (match_operand 16 "const_12_to_15_operand")
13123		     (match_operand 17 "const_12_to_15_operand")])))]
13124  "TARGET_AVX512F
13125   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13126   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13127   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13128   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
13129   && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
13130   && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
13131   && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
13132   && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
13133   && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
13134   && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
13135   && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
13136   && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
13137{
13138  int mask = 0;
13139  mask |= INTVAL (operands[2]) << 0;
13140  mask |= INTVAL (operands[3]) << 2;
13141  mask |= INTVAL (operands[4]) << 4;
13142  mask |= INTVAL (operands[5]) << 6;
13143  operands[2] = GEN_INT (mask);
13144
13145  return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
13146}
13147  [(set_attr "type" "sselog1")
13148   (set_attr "prefix" "evex")
13149   (set_attr "length_immediate" "1")
13150   (set_attr "mode" "XI")])
13151
13152(define_expand "avx512vl_pshufdv3_mask"
13153  [(match_operand:V8SI 0 "register_operand")
13154   (match_operand:V8SI 1 "nonimmediate_operand")
13155   (match_operand:SI 2 "const_0_to_255_operand")
13156   (match_operand:V8SI 3 "register_operand")
13157   (match_operand:QI 4 "register_operand")]
13158  "TARGET_AVX512VL"
13159{
13160  int mask = INTVAL (operands[2]);
13161  emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
13162				GEN_INT ((mask >> 0) & 3),
13163				GEN_INT ((mask >> 2) & 3),
13164				GEN_INT ((mask >> 4) & 3),
13165				GEN_INT ((mask >> 6) & 3),
13166				GEN_INT (((mask >> 0) & 3) + 4),
13167				GEN_INT (((mask >> 2) & 3) + 4),
13168				GEN_INT (((mask >> 4) & 3) + 4),
13169				GEN_INT (((mask >> 6) & 3) + 4),
13170                operands[3], operands[4]));
13171  DONE;
13172})
13173
13174(define_expand "avx2_pshufdv3"
13175  [(match_operand:V8SI 0 "register_operand")
13176   (match_operand:V8SI 1 "nonimmediate_operand")
13177   (match_operand:SI 2 "const_0_to_255_operand")]
13178  "TARGET_AVX2"
13179{
13180  int mask = INTVAL (operands[2]);
13181  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
13182				GEN_INT ((mask >> 0) & 3),
13183				GEN_INT ((mask >> 2) & 3),
13184				GEN_INT ((mask >> 4) & 3),
13185				GEN_INT ((mask >> 6) & 3),
13186				GEN_INT (((mask >> 0) & 3) + 4),
13187				GEN_INT (((mask >> 2) & 3) + 4),
13188				GEN_INT (((mask >> 4) & 3) + 4),
13189				GEN_INT (((mask >> 6) & 3) + 4)));
13190  DONE;
13191})
13192
13193(define_insn "avx2_pshufd_1<mask_name>"
13194  [(set (match_operand:V8SI 0 "register_operand" "=v")
13195	(vec_select:V8SI
13196	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")
13197	  (parallel [(match_operand 2 "const_0_to_3_operand")
13198		     (match_operand 3 "const_0_to_3_operand")
13199		     (match_operand 4 "const_0_to_3_operand")
13200		     (match_operand 5 "const_0_to_3_operand")
13201		     (match_operand 6 "const_4_to_7_operand")
13202		     (match_operand 7 "const_4_to_7_operand")
13203		     (match_operand 8 "const_4_to_7_operand")
13204		     (match_operand 9 "const_4_to_7_operand")])))]
13205  "TARGET_AVX2
13206   && <mask_avx512vl_condition>
13207   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13208   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13209   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13210   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
13211{
13212  int mask = 0;
13213  mask |= INTVAL (operands[2]) << 0;
13214  mask |= INTVAL (operands[3]) << 2;
13215  mask |= INTVAL (operands[4]) << 4;
13216  mask |= INTVAL (operands[5]) << 6;
13217  operands[2] = GEN_INT (mask);
13218
13219  return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13220}
13221  [(set_attr "type" "sselog1")
13222   (set_attr "prefix" "maybe_evex")
13223   (set_attr "length_immediate" "1")
13224   (set_attr "mode" "OI")])
13225
13226(define_expand "avx512vl_pshufd_mask"
13227  [(match_operand:V4SI 0 "register_operand")
13228   (match_operand:V4SI 1 "nonimmediate_operand")
13229   (match_operand:SI 2 "const_0_to_255_operand")
13230   (match_operand:V4SI 3 "register_operand")
13231   (match_operand:QI 4 "register_operand")]
13232  "TARGET_AVX512VL"
13233{
13234  int mask = INTVAL (operands[2]);
13235  emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
13236				GEN_INT ((mask >> 0) & 3),
13237				GEN_INT ((mask >> 2) & 3),
13238				GEN_INT ((mask >> 4) & 3),
13239				GEN_INT ((mask >> 6) & 3),
13240                operands[3], operands[4]));
13241  DONE;
13242})
13243
13244(define_expand "sse2_pshufd"
13245  [(match_operand:V4SI 0 "register_operand")
13246   (match_operand:V4SI 1 "vector_operand")
13247   (match_operand:SI 2 "const_int_operand")]
13248  "TARGET_SSE2"
13249{
13250  int mask = INTVAL (operands[2]);
13251  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
13252				GEN_INT ((mask >> 0) & 3),
13253				GEN_INT ((mask >> 2) & 3),
13254				GEN_INT ((mask >> 4) & 3),
13255				GEN_INT ((mask >> 6) & 3)));
13256  DONE;
13257})
13258
13259(define_insn "sse2_pshufd_1<mask_name>"
13260  [(set (match_operand:V4SI 0 "register_operand" "=v")
13261	(vec_select:V4SI
13262	  (match_operand:V4SI 1 "vector_operand" "vBm")
13263	  (parallel [(match_operand 2 "const_0_to_3_operand")
13264		     (match_operand 3 "const_0_to_3_operand")
13265		     (match_operand 4 "const_0_to_3_operand")
13266		     (match_operand 5 "const_0_to_3_operand")])))]
13267  "TARGET_SSE2 && <mask_avx512vl_condition>"
13268{
13269  int mask = 0;
13270  mask |= INTVAL (operands[2]) << 0;
13271  mask |= INTVAL (operands[3]) << 2;
13272  mask |= INTVAL (operands[4]) << 4;
13273  mask |= INTVAL (operands[5]) << 6;
13274  operands[2] = GEN_INT (mask);
13275
13276  return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13277}
13278  [(set_attr "type" "sselog1")
13279   (set_attr "prefix_data16" "1")
13280   (set_attr "prefix" "<mask_prefix2>")
13281   (set_attr "length_immediate" "1")
13282   (set_attr "mode" "TI")])
13283
13284(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
13285  [(set (match_operand:V32HI 0 "register_operand" "=v")
13286	(unspec:V32HI
13287	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13288	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
13289	  UNSPEC_PSHUFLW))]
13290  "TARGET_AVX512BW"
13291  "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13292  [(set_attr "type" "sselog")
13293   (set_attr "prefix" "evex")
13294   (set_attr "mode" "XI")])
13295
13296(define_expand "avx512vl_pshuflwv3_mask"
13297  [(match_operand:V16HI 0 "register_operand")
13298   (match_operand:V16HI 1 "nonimmediate_operand")
13299   (match_operand:SI 2 "const_0_to_255_operand")
13300   (match_operand:V16HI 3 "register_operand")
13301   (match_operand:HI 4 "register_operand")]
13302  "TARGET_AVX512VL && TARGET_AVX512BW"
13303{
13304  int mask = INTVAL (operands[2]);
13305  emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
13306				 GEN_INT ((mask >> 0) & 3),
13307				 GEN_INT ((mask >> 2) & 3),
13308				 GEN_INT ((mask >> 4) & 3),
13309				 GEN_INT ((mask >> 6) & 3),
13310				 GEN_INT (((mask >> 0) & 3) + 8),
13311				 GEN_INT (((mask >> 2) & 3) + 8),
13312				 GEN_INT (((mask >> 4) & 3) + 8),
13313				 GEN_INT (((mask >> 6) & 3) + 8),
13314                 operands[3], operands[4]));
13315  DONE;
13316})
13317
13318(define_expand "avx2_pshuflwv3"
13319  [(match_operand:V16HI 0 "register_operand")
13320   (match_operand:V16HI 1 "nonimmediate_operand")
13321   (match_operand:SI 2 "const_0_to_255_operand")]
13322  "TARGET_AVX2"
13323{
13324  int mask = INTVAL (operands[2]);
13325  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
13326				 GEN_INT ((mask >> 0) & 3),
13327				 GEN_INT ((mask >> 2) & 3),
13328				 GEN_INT ((mask >> 4) & 3),
13329				 GEN_INT ((mask >> 6) & 3),
13330				 GEN_INT (((mask >> 0) & 3) + 8),
13331				 GEN_INT (((mask >> 2) & 3) + 8),
13332				 GEN_INT (((mask >> 4) & 3) + 8),
13333				 GEN_INT (((mask >> 6) & 3) + 8)));
13334  DONE;
13335})
13336
13337(define_insn "avx2_pshuflw_1<mask_name>"
13338  [(set (match_operand:V16HI 0 "register_operand" "=v")
13339	(vec_select:V16HI
13340	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13341	  (parallel [(match_operand 2 "const_0_to_3_operand")
13342		     (match_operand 3 "const_0_to_3_operand")
13343		     (match_operand 4 "const_0_to_3_operand")
13344		     (match_operand 5 "const_0_to_3_operand")
13345		     (const_int 4)
13346		     (const_int 5)
13347		     (const_int 6)
13348		     (const_int 7)
13349		     (match_operand 6 "const_8_to_11_operand")
13350		     (match_operand 7 "const_8_to_11_operand")
13351		     (match_operand 8 "const_8_to_11_operand")
13352		     (match_operand 9 "const_8_to_11_operand")
13353		     (const_int 12)
13354		     (const_int 13)
13355		     (const_int 14)
13356		     (const_int 15)])))]
13357  "TARGET_AVX2
13358   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13359   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13360   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13361   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13362   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13363{
13364  int mask = 0;
13365  mask |= INTVAL (operands[2]) << 0;
13366  mask |= INTVAL (operands[3]) << 2;
13367  mask |= INTVAL (operands[4]) << 4;
13368  mask |= INTVAL (operands[5]) << 6;
13369  operands[2] = GEN_INT (mask);
13370
13371  return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13372}
13373  [(set_attr "type" "sselog")
13374   (set_attr "prefix" "maybe_evex")
13375   (set_attr "length_immediate" "1")
13376   (set_attr "mode" "OI")])
13377
13378(define_expand "avx512vl_pshuflw_mask"
13379  [(match_operand:V8HI 0 "register_operand")
13380   (match_operand:V8HI 1 "nonimmediate_operand")
13381   (match_operand:SI 2 "const_0_to_255_operand")
13382   (match_operand:V8HI 3 "register_operand")
13383   (match_operand:QI 4 "register_operand")]
13384  "TARGET_AVX512VL && TARGET_AVX512BW"
13385{
13386  int mask = INTVAL (operands[2]);
13387  emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
13388				 GEN_INT ((mask >> 0) & 3),
13389				 GEN_INT ((mask >> 2) & 3),
13390				 GEN_INT ((mask >> 4) & 3),
13391				 GEN_INT ((mask >> 6) & 3),
13392                 operands[3], operands[4]));
13393  DONE;
13394})
13395
13396(define_expand "sse2_pshuflw"
13397  [(match_operand:V8HI 0 "register_operand")
13398   (match_operand:V8HI 1 "vector_operand")
13399   (match_operand:SI 2 "const_int_operand")]
13400  "TARGET_SSE2"
13401{
13402  int mask = INTVAL (operands[2]);
13403  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
13404				 GEN_INT ((mask >> 0) & 3),
13405				 GEN_INT ((mask >> 2) & 3),
13406				 GEN_INT ((mask >> 4) & 3),
13407				 GEN_INT ((mask >> 6) & 3)));
13408  DONE;
13409})
13410
13411(define_insn "sse2_pshuflw_1<mask_name>"
13412  [(set (match_operand:V8HI 0 "register_operand" "=v")
13413	(vec_select:V8HI
13414	  (match_operand:V8HI 1 "vector_operand" "vBm")
13415	  (parallel [(match_operand 2 "const_0_to_3_operand")
13416		     (match_operand 3 "const_0_to_3_operand")
13417		     (match_operand 4 "const_0_to_3_operand")
13418		     (match_operand 5 "const_0_to_3_operand")
13419		     (const_int 4)
13420		     (const_int 5)
13421		     (const_int 6)
13422		     (const_int 7)])))]
13423  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13424{
13425  int mask = 0;
13426  mask |= INTVAL (operands[2]) << 0;
13427  mask |= INTVAL (operands[3]) << 2;
13428  mask |= INTVAL (operands[4]) << 4;
13429  mask |= INTVAL (operands[5]) << 6;
13430  operands[2] = GEN_INT (mask);
13431
13432  return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13433}
13434  [(set_attr "type" "sselog")
13435   (set_attr "prefix_data16" "0")
13436   (set_attr "prefix_rep" "1")
13437   (set_attr "prefix" "maybe_vex")
13438   (set_attr "length_immediate" "1")
13439   (set_attr "mode" "TI")])
13440
13441(define_expand "avx2_pshufhwv3"
13442  [(match_operand:V16HI 0 "register_operand")
13443   (match_operand:V16HI 1 "nonimmediate_operand")
13444   (match_operand:SI 2 "const_0_to_255_operand")]
13445  "TARGET_AVX2"
13446{
13447  int mask = INTVAL (operands[2]);
13448  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
13449				 GEN_INT (((mask >> 0) & 3) + 4),
13450				 GEN_INT (((mask >> 2) & 3) + 4),
13451				 GEN_INT (((mask >> 4) & 3) + 4),
13452				 GEN_INT (((mask >> 6) & 3) + 4),
13453				 GEN_INT (((mask >> 0) & 3) + 12),
13454				 GEN_INT (((mask >> 2) & 3) + 12),
13455				 GEN_INT (((mask >> 4) & 3) + 12),
13456				 GEN_INT (((mask >> 6) & 3) + 12)));
13457  DONE;
13458})
13459
13460(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
13461  [(set (match_operand:V32HI 0 "register_operand" "=v")
13462	(unspec:V32HI
13463	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13464	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
13465	  UNSPEC_PSHUFHW))]
13466  "TARGET_AVX512BW"
13467  "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13468  [(set_attr "type" "sselog")
13469   (set_attr "prefix" "evex")
13470   (set_attr "mode" "XI")])
13471
13472(define_expand "avx512vl_pshufhwv3_mask"
13473  [(match_operand:V16HI 0 "register_operand")
13474   (match_operand:V16HI 1 "nonimmediate_operand")
13475   (match_operand:SI 2 "const_0_to_255_operand")
13476   (match_operand:V16HI 3 "register_operand")
13477   (match_operand:HI 4 "register_operand")]
13478  "TARGET_AVX512VL && TARGET_AVX512BW"
13479{
13480  int mask = INTVAL (operands[2]);
13481  emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13482				 GEN_INT (((mask >> 0) & 3) + 4),
13483				 GEN_INT (((mask >> 2) & 3) + 4),
13484				 GEN_INT (((mask >> 4) & 3) + 4),
13485				 GEN_INT (((mask >> 6) & 3) + 4),
13486				 GEN_INT (((mask >> 0) & 3) + 12),
13487				 GEN_INT (((mask >> 2) & 3) + 12),
13488				 GEN_INT (((mask >> 4) & 3) + 12),
13489				 GEN_INT (((mask >> 6) & 3) + 12),
13490                 operands[3], operands[4]));
13491  DONE;
13492})
13493
13494(define_insn "avx2_pshufhw_1<mask_name>"
13495  [(set (match_operand:V16HI 0 "register_operand" "=v")
13496	(vec_select:V16HI
13497	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13498	  (parallel [(const_int 0)
13499		     (const_int 1)
13500		     (const_int 2)
13501		     (const_int 3)
13502		     (match_operand 2 "const_4_to_7_operand")
13503		     (match_operand 3 "const_4_to_7_operand")
13504		     (match_operand 4 "const_4_to_7_operand")
13505		     (match_operand 5 "const_4_to_7_operand")
13506		     (const_int 8)
13507		     (const_int 9)
13508		     (const_int 10)
13509		     (const_int 11)
13510		     (match_operand 6 "const_12_to_15_operand")
13511		     (match_operand 7 "const_12_to_15_operand")
13512		     (match_operand 8 "const_12_to_15_operand")
13513		     (match_operand 9 "const_12_to_15_operand")])))]
13514  "TARGET_AVX2
13515   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13516   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13517   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13518   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13519   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13520{
13521  int mask = 0;
13522  mask |= (INTVAL (operands[2]) - 4) << 0;
13523  mask |= (INTVAL (operands[3]) - 4) << 2;
13524  mask |= (INTVAL (operands[4]) - 4) << 4;
13525  mask |= (INTVAL (operands[5]) - 4) << 6;
13526  operands[2] = GEN_INT (mask);
13527
13528  return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13529}
13530  [(set_attr "type" "sselog")
13531   (set_attr "prefix" "maybe_evex")
13532   (set_attr "length_immediate" "1")
13533   (set_attr "mode" "OI")])
13534
13535(define_expand "avx512vl_pshufhw_mask"
13536  [(match_operand:V8HI 0 "register_operand")
13537   (match_operand:V8HI 1 "nonimmediate_operand")
13538   (match_operand:SI 2 "const_0_to_255_operand")
13539   (match_operand:V8HI 3 "register_operand")
13540   (match_operand:QI 4 "register_operand")]
13541  "TARGET_AVX512VL && TARGET_AVX512BW"
13542{
13543  int mask = INTVAL (operands[2]);
13544  emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13545				 GEN_INT (((mask >> 0) & 3) + 4),
13546				 GEN_INT (((mask >> 2) & 3) + 4),
13547				 GEN_INT (((mask >> 4) & 3) + 4),
13548				 GEN_INT (((mask >> 6) & 3) + 4),
13549                 operands[3], operands[4]));
13550  DONE;
13551})
13552
13553(define_expand "sse2_pshufhw"
13554  [(match_operand:V8HI 0 "register_operand")
13555   (match_operand:V8HI 1 "vector_operand")
13556   (match_operand:SI 2 "const_int_operand")]
13557  "TARGET_SSE2"
13558{
13559  int mask = INTVAL (operands[2]);
13560  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13561				 GEN_INT (((mask >> 0) & 3) + 4),
13562				 GEN_INT (((mask >> 2) & 3) + 4),
13563				 GEN_INT (((mask >> 4) & 3) + 4),
13564				 GEN_INT (((mask >> 6) & 3) + 4)));
13565  DONE;
13566})
13567
13568(define_insn "sse2_pshufhw_1<mask_name>"
13569  [(set (match_operand:V8HI 0 "register_operand" "=v")
13570	(vec_select:V8HI
13571	  (match_operand:V8HI 1 "vector_operand" "vBm")
13572	  (parallel [(const_int 0)
13573		     (const_int 1)
13574		     (const_int 2)
13575		     (const_int 3)
13576		     (match_operand 2 "const_4_to_7_operand")
13577		     (match_operand 3 "const_4_to_7_operand")
13578		     (match_operand 4 "const_4_to_7_operand")
13579		     (match_operand 5 "const_4_to_7_operand")])))]
13580  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13581{
13582  int mask = 0;
13583  mask |= (INTVAL (operands[2]) - 4) << 0;
13584  mask |= (INTVAL (operands[3]) - 4) << 2;
13585  mask |= (INTVAL (operands[4]) - 4) << 4;
13586  mask |= (INTVAL (operands[5]) - 4) << 6;
13587  operands[2] = GEN_INT (mask);
13588
13589  return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13590}
13591  [(set_attr "type" "sselog")
13592   (set_attr "prefix_rep" "1")
13593   (set_attr "prefix_data16" "0")
13594   (set_attr "prefix" "maybe_vex")
13595   (set_attr "length_immediate" "1")
13596   (set_attr "mode" "TI")])
13597
13598(define_expand "sse2_loadd"
13599  [(set (match_operand:V4SI 0 "register_operand")
13600	(vec_merge:V4SI
13601	  (vec_duplicate:V4SI
13602	    (match_operand:SI 1 "nonimmediate_operand"))
13603	  (match_dup 2)
13604	  (const_int 1)))]
13605  "TARGET_SSE"
13606  "operands[2] = CONST0_RTX (V4SImode);")
13607
13608(define_insn "sse2_loadld"
13609  [(set (match_operand:V4SI 0 "register_operand"       "=v,Yi,x,x,v")
13610	(vec_merge:V4SI
13611	  (vec_duplicate:V4SI
13612	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
13613	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,v")
13614	  (const_int 1)))]
13615  "TARGET_SSE"
13616  "@
13617   %vmovd\t{%2, %0|%0, %2}
13618   %vmovd\t{%2, %0|%0, %2}
13619   movss\t{%2, %0|%0, %2}
13620   movss\t{%2, %0|%0, %2}
13621   vmovss\t{%2, %1, %0|%0, %1, %2}"
13622  [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
13623   (set_attr "type" "ssemov")
13624   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
13625   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
13626
13627;; QI and HI modes handled by pextr patterns.
13628(define_mode_iterator PEXTR_MODE12
13629  [(V16QI "TARGET_SSE4_1") V8HI])
13630
13631(define_insn "*vec_extract<mode>"
13632  [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
13633	(vec_select:<ssescalarmode>
13634	  (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
13635	  (parallel
13636	    [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
13637  "TARGET_SSE2"
13638  "@
13639   %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13640   %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
13641   vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13642   vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13643  [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
13644   (set_attr "type" "sselog1")
13645   (set_attr "prefix_data16" "1")
13646   (set (attr "prefix_extra")
13647     (if_then_else
13648       (and (eq_attr "alternative" "0,2")
13649	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13650       (const_string "*")
13651       (const_string "1")))
13652   (set_attr "length_immediate" "1")
13653   (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
13654   (set_attr "mode" "TI")])
13655
13656(define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
13657  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
13658	(zero_extend:SWI48
13659	  (vec_select:<PEXTR_MODE12:ssescalarmode>
13660	    (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
13661	    (parallel
13662	      [(match_operand:SI 2
13663		"const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
13664  "TARGET_SSE2"
13665  "@
13666   %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13667   vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13668  [(set_attr "isa" "*,avx512bw")
13669   (set_attr "type" "sselog1")
13670   (set_attr "prefix_data16" "1")
13671   (set (attr "prefix_extra")
13672     (if_then_else
13673       (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13674       (const_string "*")
13675       (const_string "1")))
13676   (set_attr "length_immediate" "1")
13677   (set_attr "prefix" "maybe_vex")
13678   (set_attr "mode" "TI")])
13679
13680(define_insn "*vec_extract<mode>_mem"
13681  [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13682	(vec_select:<ssescalarmode>
13683	  (match_operand:VI12_128 1 "memory_operand" "o")
13684	  (parallel
13685	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13686  "TARGET_SSE"
13687  "#")
13688
13689(define_insn "*vec_extract<ssevecmodelower>_0"
13690  [(set (match_operand:SWI48 0 "nonimmediate_operand"	       "=r ,v ,m")
13691	(vec_select:SWI48
13692	  (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,vm,v")
13693	  (parallel [(const_int 0)])))]
13694  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13695  "#")
13696
13697(define_insn "*vec_extractv2di_0_sse"
13698  [(set (match_operand:DI 0 "nonimmediate_operand"     "=v,m")
13699	(vec_select:DI
13700	  (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
13701	  (parallel [(const_int 0)])))]
13702  "TARGET_SSE && !TARGET_64BIT
13703   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13704  "#")
13705
13706(define_split
13707  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13708	(vec_select:SWI48x
13709	  (match_operand:<ssevecmode> 1 "register_operand")
13710	  (parallel [(const_int 0)])))]
13711  "TARGET_SSE && reload_completed"
13712  [(set (match_dup 0) (match_dup 1))]
13713  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
13714
13715(define_insn "*vec_extractv4si_0_zext_sse4"
13716  [(set (match_operand:DI 0 "register_operand" "=r,x,v")
13717	(zero_extend:DI
13718	  (vec_select:SI
13719	    (match_operand:V4SI 1 "register_operand" "Yj,x,v")
13720	    (parallel [(const_int 0)]))))]
13721  "TARGET_SSE4_1"
13722  "#"
13723  [(set_attr "isa" "x64,*,avx512f")])
13724
13725(define_insn "*vec_extractv4si_0_zext"
13726  [(set (match_operand:DI 0 "register_operand" "=r")
13727	(zero_extend:DI
13728	  (vec_select:SI
13729	    (match_operand:V4SI 1 "register_operand" "x")
13730	    (parallel [(const_int 0)]))))]
13731  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13732  "#")
13733
13734(define_split
13735  [(set (match_operand:DI 0 "register_operand")
13736	(zero_extend:DI
13737	  (vec_select:SI
13738	    (match_operand:V4SI 1 "register_operand")
13739	    (parallel [(const_int 0)]))))]
13740  "TARGET_SSE2 && reload_completed"
13741  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13742  "operands[1] = gen_lowpart (SImode, operands[1]);")
13743
13744(define_insn "*vec_extractv4si"
13745  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
13746	(vec_select:SI
13747	  (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
13748	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13749  "TARGET_SSE4_1"
13750{
13751  switch (which_alternative)
13752    {
13753    case 0:
13754    case 1:
13755      return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13756
13757    case 2:
13758    case 3:
13759      operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13760      return "psrldq\t{%2, %0|%0, %2}";
13761
13762    case 4:
13763    case 5:
13764      operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13765      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13766
13767    default:
13768      gcc_unreachable ();
13769    }
13770}
13771  [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
13772   (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
13773   (set (attr "prefix_extra")
13774     (if_then_else (eq_attr "alternative" "0,1")
13775		   (const_string "1")
13776		   (const_string "*")))
13777   (set_attr "length_immediate" "1")
13778   (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
13779   (set_attr "mode" "TI")])
13780
13781(define_insn "*vec_extractv4si_zext"
13782  [(set (match_operand:DI 0 "register_operand" "=r,r")
13783	(zero_extend:DI
13784	  (vec_select:SI
13785	    (match_operand:V4SI 1 "register_operand" "x,v")
13786	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13787  "TARGET_64BIT && TARGET_SSE4_1"
13788  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13789  [(set_attr "isa" "*,avx512dq")
13790   (set_attr "type" "sselog1")
13791   (set_attr "prefix_extra" "1")
13792   (set_attr "length_immediate" "1")
13793   (set_attr "prefix" "maybe_vex")
13794   (set_attr "mode" "TI")])
13795
13796(define_insn "*vec_extractv4si_mem"
13797  [(set (match_operand:SI 0 "register_operand" "=x,r")
13798	(vec_select:SI
13799	  (match_operand:V4SI 1 "memory_operand" "o,o")
13800	  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13801  "TARGET_SSE"
13802  "#")
13803
13804(define_insn_and_split "*vec_extractv4si_zext_mem"
13805  [(set (match_operand:DI 0 "register_operand" "=x,r")
13806	(zero_extend:DI
13807	  (vec_select:SI
13808	    (match_operand:V4SI 1 "memory_operand" "o,o")
13809	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13810  "TARGET_64BIT && TARGET_SSE"
13811  "#"
13812  "&& reload_completed"
13813  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13814{
13815  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13816})
13817
13818(define_insn "*vec_extractv2di_1"
13819  [(set (match_operand:DI 0 "nonimmediate_operand"     "=rm,rm,m,x,x,Yv,x,v,r")
13820	(vec_select:DI
13821	  (match_operand:V2DI 1 "nonimmediate_operand"  "x ,v ,v,0,x, v,x,o,o")
13822	  (parallel [(const_int 1)])))]
13823  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13824  "@
13825   %vpextrq\t{$1, %1, %0|%0, %1, 1}
13826   vpextrq\t{$1, %1, %0|%0, %1, 1}
13827   %vmovhps\t{%1, %0|%0, %1}
13828   psrldq\t{$8, %0|%0, 8}
13829   vpsrldq\t{$8, %1, %0|%0, %1, 8}
13830   vpsrldq\t{$8, %1, %0|%0, %1, 8}
13831   movhlps\t{%1, %0|%0, %1}
13832   #
13833   #"
13834  [(set (attr "isa")
13835     (cond [(eq_attr "alternative" "0")
13836	      (const_string "x64_sse4")
13837	    (eq_attr "alternative" "1")
13838	      (const_string "x64_avx512dq")
13839	    (eq_attr "alternative" "3")
13840	      (const_string "sse2_noavx")
13841	    (eq_attr "alternative" "4")
13842	      (const_string "avx")
13843	    (eq_attr "alternative" "5")
13844	      (const_string "avx512bw")
13845	    (eq_attr "alternative" "6")
13846	      (const_string "noavx")
13847	    (eq_attr "alternative" "8")
13848	      (const_string "x64")
13849	   ]
13850	   (const_string "*")))
13851   (set (attr "type")
13852     (cond [(eq_attr "alternative" "2,6,7")
13853	      (const_string "ssemov")
13854	    (eq_attr "alternative" "3,4,5")
13855	      (const_string "sseishft1")
13856	    (eq_attr "alternative" "8")
13857	      (const_string "imov")
13858	   ]
13859	   (const_string "sselog1")))
13860   (set (attr "length_immediate")
13861     (if_then_else (eq_attr "alternative" "0,1,3,4,5")
13862		   (const_string "1")
13863		   (const_string "*")))
13864   (set (attr "prefix_rex")
13865     (if_then_else (eq_attr "alternative" "0,1")
13866		   (const_string "1")
13867		   (const_string "*")))
13868   (set (attr "prefix_extra")
13869     (if_then_else (eq_attr "alternative" "0,1")
13870		   (const_string "1")
13871		   (const_string "*")))
13872   (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
13873   (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
13874
13875(define_split
13876  [(set (match_operand:<ssescalarmode> 0 "register_operand")
13877	(vec_select:<ssescalarmode>
13878	  (match_operand:VI_128 1 "memory_operand")
13879	  (parallel
13880	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13881  "TARGET_SSE && reload_completed"
13882  [(set (match_dup 0) (match_dup 1))]
13883{
13884  int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13885
13886  operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13887})
13888
13889(define_insn "*vec_extractv2ti"
13890  [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
13891	(vec_select:TI
13892	  (match_operand:V2TI 1 "register_operand" "x,v")
13893	  (parallel
13894	    [(match_operand:SI 2 "const_0_to_1_operand")])))]
13895  "TARGET_AVX"
13896  "@
13897   vextract%~128\t{%2, %1, %0|%0, %1, %2}
13898   vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
13899  [(set_attr "type" "sselog")
13900   (set_attr "prefix_extra" "1")
13901   (set_attr "length_immediate" "1")
13902   (set_attr "prefix" "vex,evex")
13903   (set_attr "mode" "OI")])
13904
13905(define_insn "*vec_extractv4ti"
13906  [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
13907	(vec_select:TI
13908	  (match_operand:V4TI 1 "register_operand" "v")
13909	  (parallel
13910	    [(match_operand:SI 2 "const_0_to_3_operand")])))]
13911  "TARGET_AVX512F"
13912  "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
13913  [(set_attr "type" "sselog")
13914   (set_attr "prefix_extra" "1")
13915   (set_attr "length_immediate" "1")
13916   (set_attr "prefix" "evex")
13917   (set_attr "mode" "XI")])
13918
13919(define_mode_iterator VEXTRACTI128_MODE
13920  [(V4TI "TARGET_AVX512F") V2TI])
13921
13922(define_split
13923  [(set (match_operand:TI 0 "nonimmediate_operand")
13924	(vec_select:TI
13925	  (match_operand:VEXTRACTI128_MODE 1 "register_operand")
13926	  (parallel [(const_int 0)])))]
13927  "TARGET_AVX
13928   && reload_completed
13929   && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
13930  [(set (match_dup 0) (match_dup 1))]
13931  "operands[1] = gen_lowpart (TImode, operands[1]);")
13932
13933;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13934;; vector modes into vec_extract*.
13935(define_split
13936  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13937	(subreg:SWI48x (match_operand 1 "register_operand") 0))]
13938  "can_create_pseudo_p ()
13939   && REG_P (operands[1])
13940   && VECTOR_MODE_P (GET_MODE (operands[1]))
13941   && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
13942       || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
13943       || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
13944   && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13945  [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13946					 (parallel [(const_int 0)])))]
13947{
13948  rtx tmp;
13949
13950  switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13951    {
13952    case 64:
13953      if (<MODE>mode == SImode)
13954	{
13955	  tmp = gen_reg_rtx (V8SImode);
13956	  emit_insn (gen_vec_extract_lo_v16si (tmp,
13957					       gen_lowpart (V16SImode,
13958							    operands[1])));
13959	}
13960      else
13961	{
13962	  tmp = gen_reg_rtx (V4DImode);
13963	  emit_insn (gen_vec_extract_lo_v8di (tmp,
13964					      gen_lowpart (V8DImode,
13965							   operands[1])));
13966	}
13967      operands[1] = tmp;
13968      /* FALLTHRU */
13969    case 32:
13970      tmp = gen_reg_rtx (<ssevecmode>mode);
13971      if (<MODE>mode == SImode)
13972	emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13973							      operands[1])));
13974      else
13975	emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13976							      operands[1])));
13977      operands[1] = tmp;
13978      break;
13979    case 16:
13980      operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13981      break;
13982    }
13983})
13984
13985(define_insn "*vec_concatv2si_sse4_1"
13986  [(set (match_operand:V2SI 0 "register_operand"
13987	  "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
13988	(vec_concat:V2SI
13989	  (match_operand:SI 1 "nonimmediate_operand"
13990	  "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
13991	  (match_operand:SI 2 "vector_move_operand"
13992	  " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
13993  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13994  "@
13995   pinsrd\t{$1, %2, %0|%0, %2, 1}
13996   pinsrd\t{$1, %2, %0|%0, %2, 1}
13997   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13998   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13999   punpckldq\t{%2, %0|%0, %2}
14000   punpckldq\t{%2, %0|%0, %2}
14001   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
14002   %vmovd\t{%1, %0|%0, %1}
14003   punpckldq\t{%2, %0|%0, %2}
14004   movd\t{%1, %0|%0, %1}"
14005  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
14006   (set (attr "type")
14007     (cond [(eq_attr "alternative" "7")
14008	      (const_string "ssemov")
14009	    (eq_attr "alternative" "8")
14010	      (const_string "mmxcvt")
14011	    (eq_attr "alternative" "9")
14012	      (const_string "mmxmov")
14013	   ]
14014	   (const_string "sselog")))
14015   (set (attr "prefix_extra")
14016     (if_then_else (eq_attr "alternative" "0,1,2,3")
14017		   (const_string "1")
14018		   (const_string "*")))
14019   (set (attr "length_immediate")
14020     (if_then_else (eq_attr "alternative" "0,1,2,3")
14021		   (const_string "1")
14022		   (const_string "*")))
14023   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
14024   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
14025
14026;; ??? In theory we can match memory for the MMX alternative, but allowing
14027;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
14028;; alternatives pretty much forces the MMX alternative to be chosen.
14029(define_insn "*vec_concatv2si"
14030  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,x,x,*y,*y")
14031	(vec_concat:V2SI
14032	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
14033	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,C, x,C,*y,C")))]
14034  "TARGET_SSE && !TARGET_SSE4_1"
14035  "@
14036   punpckldq\t{%2, %0|%0, %2}
14037   movd\t{%1, %0|%0, %1}
14038   movd\t{%1, %0|%0, %1}
14039   unpcklps\t{%2, %0|%0, %2}
14040   movss\t{%1, %0|%0, %1}
14041   punpckldq\t{%2, %0|%0, %2}
14042   movd\t{%1, %0|%0, %1}"
14043  [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
14044   (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
14045   (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
14046
14047(define_insn "*vec_concatv4si"
14048  [(set (match_operand:V4SI 0 "register_operand"       "=x,v,x,x,v")
14049	(vec_concat:V4SI
14050	  (match_operand:V2SI 1 "register_operand"     " 0,v,0,0,v")
14051	  (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
14052  "TARGET_SSE"
14053  "@
14054   punpcklqdq\t{%2, %0|%0, %2}
14055   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
14056   movlhps\t{%2, %0|%0, %2}
14057   movhps\t{%2, %0|%0, %q2}
14058   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
14059  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
14060   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
14061   (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
14062   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
14063
14064;; movd instead of movq is required to handle broken assemblers.
14065(define_insn "vec_concatv2di"
14066  [(set (match_operand:V2DI 0 "register_operand"
14067	  "=Yr,*x,x ,v ,Yi,v ,x    ,x,v ,x,x,v")
14068	(vec_concat:V2DI
14069	  (match_operand:DI 1 "nonimmediate_operand"
14070	  "  0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v")
14071	  (match_operand:DI 2 "vector_move_operand"
14072	  " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
14073  "TARGET_SSE"
14074  "@
14075   pinsrq\t{$1, %2, %0|%0, %2, 1}
14076   pinsrq\t{$1, %2, %0|%0, %2, 1}
14077   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
14078   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
14079   * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
14080   %vmovq\t{%1, %0|%0, %1}
14081   movq2dq\t{%1, %0|%0, %1}
14082   punpcklqdq\t{%2, %0|%0, %2}
14083   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
14084   movlhps\t{%2, %0|%0, %2}
14085   movhps\t{%2, %0|%0, %2}
14086   vmovhps\t{%2, %1, %0|%0, %1, %2}"
14087  [(set (attr "isa")
14088     (cond [(eq_attr "alternative" "0,1")
14089	      (const_string "x64_sse4_noavx")
14090	    (eq_attr "alternative" "2")
14091	      (const_string "x64_avx")
14092	    (eq_attr "alternative" "3")
14093	      (const_string "x64_avx512dq")
14094	    (eq_attr "alternative" "4")
14095	      (const_string "x64")
14096	    (eq_attr "alternative" "5,6")
14097	      (const_string "sse2")
14098	    (eq_attr "alternative" "7")
14099	      (const_string "sse2_noavx")
14100	    (eq_attr "alternative" "8,11")
14101	      (const_string "avx")
14102	   ]
14103	   (const_string "noavx")))
14104   (set (attr "type")
14105     (if_then_else
14106       (eq_attr "alternative" "0,1,2,3,7,8")
14107       (const_string "sselog")
14108       (const_string "ssemov")))
14109   (set (attr "prefix_rex")
14110     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
14111		   (const_string "1")
14112		   (const_string "*")))
14113   (set (attr "prefix_extra")
14114     (if_then_else (eq_attr "alternative" "0,1,2,3")
14115		   (const_string "1")
14116		   (const_string "*")))
14117   (set (attr "length_immediate")
14118     (if_then_else (eq_attr "alternative" "0,1,2,3")
14119		   (const_string "1")
14120		   (const_string "*")))
14121   (set (attr "prefix")
14122     (cond [(eq_attr "alternative" "2")
14123	      (const_string "vex")
14124	    (eq_attr "alternative" "3")
14125	      (const_string "evex")
14126	    (eq_attr "alternative" "4,5")
14127	      (const_string "maybe_vex")
14128	    (eq_attr "alternative" "8,11")
14129	      (const_string "maybe_evex")
14130	   ]
14131	   (const_string "orig")))
14132   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
14133
14134;; vmovq clears also the higher bits.
14135(define_insn "vec_set<mode>_0"
14136  [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=Yi,v")
14137	(vec_merge:VI8_AVX_AVX512F
14138	  (vec_duplicate:VI8_AVX_AVX512F
14139	    (match_operand:<ssescalarmode> 2 "general_operand" "r,vm"))
14140	  (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
14141	  (const_int 1)))]
14142  "TARGET_AVX"
14143  "vmovq\t{%2, %x0|%x0, %2}"
14144  [(set_attr "isa" "x64,*")
14145   (set_attr "type" "ssemov")
14146   (set_attr "prefix_rex" "1,*")
14147   (set_attr "prefix" "maybe_evex")
14148   (set_attr "mode" "TI")])
14149
14150(define_expand "vec_unpacks_lo_<mode>"
14151  [(match_operand:<sseunpackmode> 0 "register_operand")
14152   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14153  "TARGET_SSE2"
14154  "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
14155
14156(define_expand "vec_unpacks_hi_<mode>"
14157  [(match_operand:<sseunpackmode> 0 "register_operand")
14158   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14159  "TARGET_SSE2"
14160  "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
14161
14162(define_expand "vec_unpacku_lo_<mode>"
14163  [(match_operand:<sseunpackmode> 0 "register_operand")
14164   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14165  "TARGET_SSE2"
14166  "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
14167
14168(define_expand "vec_unpacks_lo_hi"
14169  [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14170        (match_operand:HI 1 "register_operand"))]
14171  "TARGET_AVX512F")
14172
14173(define_expand "vec_unpacks_lo_si"
14174  [(set (match_operand:HI 0 "register_operand")
14175        (subreg:HI (match_operand:SI 1 "register_operand") 0))]
14176  "TARGET_AVX512F")
14177
14178(define_expand "vec_unpacks_lo_di"
14179  [(set (match_operand:SI 0 "register_operand")
14180        (subreg:SI (match_operand:DI 1 "register_operand") 0))]
14181  "TARGET_AVX512BW")
14182
14183(define_expand "vec_unpacku_hi_<mode>"
14184  [(match_operand:<sseunpackmode> 0 "register_operand")
14185   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14186  "TARGET_SSE2"
14187  "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
14188
14189(define_expand "vec_unpacks_hi_hi"
14190  [(parallel
14191     [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14192	   (lshiftrt:HI (match_operand:HI 1 "register_operand")
14193			(const_int 8)))
14194      (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14195  "TARGET_AVX512F")
14196
14197(define_expand "vec_unpacks_hi_<mode>"
14198  [(parallel
14199     [(set (subreg:SWI48x
14200	     (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
14201	   (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
14202			    (match_dup 2)))
14203      (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14204  "TARGET_AVX512BW"
14205  "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
14206
14207;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14208;;
14209;; Miscellaneous
14210;;
14211;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14212
14213(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
14214  [(set (match_operand:VI12_AVX2 0 "register_operand")
14215	(truncate:VI12_AVX2
14216	  (lshiftrt:<ssedoublemode>
14217	    (plus:<ssedoublemode>
14218	      (plus:<ssedoublemode>
14219		(zero_extend:<ssedoublemode>
14220		  (match_operand:VI12_AVX2 1 "vector_operand"))
14221		(zero_extend:<ssedoublemode>
14222		  (match_operand:VI12_AVX2 2 "vector_operand")))
14223	      (match_dup <mask_expand_op3>))
14224	    (const_int 1))))]
14225  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14226{
14227  rtx tmp;
14228  if (<mask_applied>)
14229    tmp = operands[3];
14230  operands[3] = CONST1_RTX(<MODE>mode);
14231  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
14232
14233  if (<mask_applied>)
14234    {
14235      operands[5] = operands[3];
14236      operands[3] = tmp;
14237    }
14238})
14239
14240(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
14241  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
14242	(truncate:VI12_AVX2
14243	  (lshiftrt:<ssedoublemode>
14244	    (plus:<ssedoublemode>
14245	      (plus:<ssedoublemode>
14246		(zero_extend:<ssedoublemode>
14247		  (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
14248		(zero_extend:<ssedoublemode>
14249		  (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
14250	      (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
14251	    (const_int 1))))]
14252  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14253   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14254  "@
14255   pavg<ssemodesuffix>\t{%2, %0|%0, %2}
14256   vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14257  [(set_attr "isa" "noavx,avx")
14258   (set_attr "type" "sseiadd")
14259   (set_attr "prefix_data16" "1,*")
14260   (set_attr "prefix" "orig,<mask_prefix>")
14261   (set_attr "mode" "<sseinsnmode>")])
14262
14263;; The correct representation for this is absolutely enormous, and
14264;; surely not generally useful.
14265(define_insn "<sse2_avx2>_psadbw"
14266  [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
14267	(unspec:VI8_AVX2_AVX512BW
14268	  [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
14269	   (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
14270	  UNSPEC_PSADBW))]
14271  "TARGET_SSE2"
14272  "@
14273   psadbw\t{%2, %0|%0, %2}
14274   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
14275  [(set_attr "isa" "noavx,avx")
14276   (set_attr "type" "sseiadd")
14277   (set_attr "atom_unit" "simul")
14278   (set_attr "prefix_data16" "1,*")
14279   (set_attr "prefix" "orig,maybe_evex")
14280   (set_attr "mode" "<sseinsnmode>")])
14281
14282(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
14283  [(set (match_operand:SI 0 "register_operand" "=r")
14284	(unspec:SI
14285	  [(match_operand:VF_128_256 1 "register_operand" "x")]
14286	  UNSPEC_MOVMSK))]
14287  "TARGET_SSE"
14288  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
14289  [(set_attr "type" "ssemov")
14290   (set_attr "prefix" "maybe_vex")
14291   (set_attr "mode" "<MODE>")])
14292
14293(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
14294  [(set (match_operand:DI 0 "register_operand" "=r")
14295	(zero_extend:DI
14296	  (unspec:SI
14297	    [(match_operand:VF_128_256 1 "register_operand" "x")]
14298	    UNSPEC_MOVMSK)))]
14299  "TARGET_64BIT && TARGET_SSE"
14300  "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
14301  [(set_attr "type" "ssemov")
14302   (set_attr "prefix" "maybe_vex")
14303   (set_attr "mode" "<MODE>")])
14304
14305(define_insn "<sse2_avx2>_pmovmskb"
14306  [(set (match_operand:SI 0 "register_operand" "=r")
14307	(unspec:SI
14308	  [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14309	  UNSPEC_MOVMSK))]
14310  "TARGET_SSE2"
14311  "%vpmovmskb\t{%1, %0|%0, %1}"
14312  [(set_attr "type" "ssemov")
14313   (set (attr "prefix_data16")
14314     (if_then_else
14315       (match_test "TARGET_AVX")
14316     (const_string "*")
14317     (const_string "1")))
14318   (set_attr "prefix" "maybe_vex")
14319   (set_attr "mode" "SI")])
14320
14321(define_insn "*<sse2_avx2>_pmovmskb_zext"
14322  [(set (match_operand:DI 0 "register_operand" "=r")
14323	(zero_extend:DI
14324	  (unspec:SI
14325	    [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14326	    UNSPEC_MOVMSK)))]
14327  "TARGET_64BIT && TARGET_SSE2"
14328  "%vpmovmskb\t{%1, %k0|%k0, %1}"
14329  [(set_attr "type" "ssemov")
14330   (set (attr "prefix_data16")
14331     (if_then_else
14332       (match_test "TARGET_AVX")
14333     (const_string "*")
14334     (const_string "1")))
14335   (set_attr "prefix" "maybe_vex")
14336   (set_attr "mode" "SI")])
14337
14338(define_expand "sse2_maskmovdqu"
14339  [(set (match_operand:V16QI 0 "memory_operand")
14340	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
14341		       (match_operand:V16QI 2 "register_operand")
14342		       (match_dup 0)]
14343		      UNSPEC_MASKMOV))]
14344  "TARGET_SSE2")
14345
14346(define_insn "*sse2_maskmovdqu"
14347  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
14348	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
14349		       (match_operand:V16QI 2 "register_operand" "x")
14350		       (mem:V16QI (match_dup 0))]
14351		      UNSPEC_MASKMOV))]
14352  "TARGET_SSE2"
14353{
14354  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
14355     that requires %v to be at the beginning of the opcode name.  */
14356  if (Pmode != word_mode)
14357    fputs ("\taddr32", asm_out_file);
14358  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
14359}
14360  [(set_attr "type" "ssemov")
14361   (set_attr "prefix_data16" "1")
14362   (set (attr "length_address")
14363     (symbol_ref ("Pmode != word_mode")))
14364   ;; The implicit %rdi operand confuses default length_vex computation.
14365   (set (attr "length_vex")
14366     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
14367   (set_attr "prefix" "maybe_vex")
14368   (set_attr "znver1_decode" "vector")
14369   (set_attr "mode" "TI")])
14370
14371(define_insn "sse_ldmxcsr"
14372  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
14373		    UNSPECV_LDMXCSR)]
14374  "TARGET_SSE"
14375  "%vldmxcsr\t%0"
14376  [(set_attr "type" "sse")
14377   (set_attr "atom_sse_attr" "mxcsr")
14378   (set_attr "prefix" "maybe_vex")
14379   (set_attr "memory" "load")])
14380
14381(define_insn "sse_stmxcsr"
14382  [(set (match_operand:SI 0 "memory_operand" "=m")
14383	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
14384  "TARGET_SSE"
14385  "%vstmxcsr\t%0"
14386  [(set_attr "type" "sse")
14387   (set_attr "atom_sse_attr" "mxcsr")
14388   (set_attr "prefix" "maybe_vex")
14389   (set_attr "memory" "store")])
14390
14391(define_insn "sse2_clflush"
14392  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
14393		    UNSPECV_CLFLUSH)]
14394  "TARGET_SSE2"
14395  "clflush\t%a0"
14396  [(set_attr "type" "sse")
14397   (set_attr "atom_sse_attr" "fence")
14398   (set_attr "memory" "unknown")])
14399
14400;; As per AMD and Intel ISA manuals, the first operand is extensions
14401;; and it goes to %ecx. The second operand received is hints and it goes
14402;; to %eax.
14403(define_insn "sse3_mwait"
14404  [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
14405		     (match_operand:SI 1 "register_operand" "a")]
14406		    UNSPECV_MWAIT)]
14407  "TARGET_SSE3"
14408;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
14409;; Since 32bit register operands are implicitly zero extended to 64bit,
14410;; we only need to set up 32bit registers.
14411  "mwait"
14412  [(set_attr "length" "3")])
14413
14414(define_insn "sse3_monitor_<mode>"
14415  [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
14416		     (match_operand:SI 1 "register_operand" "c")
14417		     (match_operand:SI 2 "register_operand" "d")]
14418		    UNSPECV_MONITOR)]
14419  "TARGET_SSE3"
14420;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
14421;; RCX and RDX are used.  Since 32bit register operands are implicitly
14422;; zero extended to 64bit, we only need to set up 32bit registers.
14423  "%^monitor"
14424  [(set (attr "length")
14425     (symbol_ref ("(Pmode != word_mode) + 3")))])
14426
14427;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14428;;
14429;; SSSE3 instructions
14430;;
14431;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14432
14433(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
14434
14435(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
14436  [(set (match_operand:V16HI 0 "register_operand" "=x")
14437	(vec_concat:V16HI
14438	  (vec_concat:V8HI
14439	    (vec_concat:V4HI
14440	      (vec_concat:V2HI
14441		(ssse3_plusminus:HI
14442		  (vec_select:HI
14443		    (match_operand:V16HI 1 "register_operand" "x")
14444		    (parallel [(const_int 0)]))
14445		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14446		(ssse3_plusminus:HI
14447		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14448		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14449	      (vec_concat:V2HI
14450		(ssse3_plusminus:HI
14451		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14452		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14453		(ssse3_plusminus:HI
14454		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14455		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14456	    (vec_concat:V4HI
14457	      (vec_concat:V2HI
14458		(ssse3_plusminus:HI
14459		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
14460		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
14461		(ssse3_plusminus:HI
14462		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
14463		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
14464	      (vec_concat:V2HI
14465		(ssse3_plusminus:HI
14466		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
14467		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
14468		(ssse3_plusminus:HI
14469		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
14470		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
14471	  (vec_concat:V8HI
14472	    (vec_concat:V4HI
14473	      (vec_concat:V2HI
14474		(ssse3_plusminus:HI
14475		  (vec_select:HI
14476		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14477		    (parallel [(const_int 0)]))
14478		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14479		(ssse3_plusminus:HI
14480		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14481		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14482	      (vec_concat:V2HI
14483		(ssse3_plusminus:HI
14484		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14485		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14486		(ssse3_plusminus:HI
14487		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14488		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
14489	    (vec_concat:V4HI
14490	      (vec_concat:V2HI
14491		(ssse3_plusminus:HI
14492		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
14493		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
14494		(ssse3_plusminus:HI
14495		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
14496		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
14497	      (vec_concat:V2HI
14498		(ssse3_plusminus:HI
14499		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
14500		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
14501		(ssse3_plusminus:HI
14502		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
14503		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
14504  "TARGET_AVX2"
14505  "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14506  [(set_attr "type" "sseiadd")
14507   (set_attr "prefix_extra" "1")
14508   (set_attr "prefix" "vex")
14509   (set_attr "mode" "OI")])
14510
14511(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
14512  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14513	(vec_concat:V8HI
14514	  (vec_concat:V4HI
14515	    (vec_concat:V2HI
14516	      (ssse3_plusminus:HI
14517		(vec_select:HI
14518		  (match_operand:V8HI 1 "register_operand" "0,x")
14519		  (parallel [(const_int 0)]))
14520		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14521	      (ssse3_plusminus:HI
14522		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14523		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14524	    (vec_concat:V2HI
14525	      (ssse3_plusminus:HI
14526		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14527		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14528	      (ssse3_plusminus:HI
14529		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14530		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14531	  (vec_concat:V4HI
14532	    (vec_concat:V2HI
14533	      (ssse3_plusminus:HI
14534		(vec_select:HI
14535		  (match_operand:V8HI 2 "vector_operand" "xBm,xm")
14536		  (parallel [(const_int 0)]))
14537		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14538	      (ssse3_plusminus:HI
14539		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14540		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14541	    (vec_concat:V2HI
14542	      (ssse3_plusminus:HI
14543		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14544		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14545	      (ssse3_plusminus:HI
14546		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14547		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
14548  "TARGET_SSSE3"
14549  "@
14550   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
14551   vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14552  [(set_attr "isa" "noavx,avx")
14553   (set_attr "type" "sseiadd")
14554   (set_attr "atom_unit" "complex")
14555   (set_attr "prefix_data16" "1,*")
14556   (set_attr "prefix_extra" "1")
14557   (set_attr "prefix" "orig,vex")
14558   (set_attr "mode" "TI")])
14559
14560(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
14561  [(set (match_operand:V4HI 0 "register_operand" "=y")
14562	(vec_concat:V4HI
14563	  (vec_concat:V2HI
14564	    (ssse3_plusminus:HI
14565	      (vec_select:HI
14566		(match_operand:V4HI 1 "register_operand" "0")
14567		(parallel [(const_int 0)]))
14568	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14569	    (ssse3_plusminus:HI
14570	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14571	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14572	  (vec_concat:V2HI
14573	    (ssse3_plusminus:HI
14574	      (vec_select:HI
14575		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
14576		(parallel [(const_int 0)]))
14577	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14578	    (ssse3_plusminus:HI
14579	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14580	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
14581  "TARGET_SSSE3"
14582  "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
14583  [(set_attr "type" "sseiadd")
14584   (set_attr "atom_unit" "complex")
14585   (set_attr "prefix_extra" "1")
14586   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14587   (set_attr "mode" "DI")])
14588
14589(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
14590  [(set (match_operand:V8SI 0 "register_operand" "=x")
14591	(vec_concat:V8SI
14592	  (vec_concat:V4SI
14593	    (vec_concat:V2SI
14594	      (plusminus:SI
14595		(vec_select:SI
14596		  (match_operand:V8SI 1 "register_operand" "x")
14597		  (parallel [(const_int 0)]))
14598		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14599	      (plusminus:SI
14600		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14601		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14602	    (vec_concat:V2SI
14603	      (plusminus:SI
14604		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
14605		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
14606	      (plusminus:SI
14607		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
14608		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
14609	  (vec_concat:V4SI
14610	    (vec_concat:V2SI
14611	      (plusminus:SI
14612		(vec_select:SI
14613		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
14614		  (parallel [(const_int 0)]))
14615		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14616	      (plusminus:SI
14617		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14618		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
14619	    (vec_concat:V2SI
14620	      (plusminus:SI
14621		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
14622		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
14623	      (plusminus:SI
14624		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
14625		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
14626  "TARGET_AVX2"
14627  "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14628  [(set_attr "type" "sseiadd")
14629   (set_attr "prefix_extra" "1")
14630   (set_attr "prefix" "vex")
14631   (set_attr "mode" "OI")])
14632
14633(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
14634  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
14635	(vec_concat:V4SI
14636	  (vec_concat:V2SI
14637	    (plusminus:SI
14638	      (vec_select:SI
14639		(match_operand:V4SI 1 "register_operand" "0,x")
14640		(parallel [(const_int 0)]))
14641	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14642	    (plusminus:SI
14643	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14644	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14645	  (vec_concat:V2SI
14646	    (plusminus:SI
14647	      (vec_select:SI
14648		(match_operand:V4SI 2 "vector_operand" "xBm,xm")
14649		(parallel [(const_int 0)]))
14650	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14651	    (plusminus:SI
14652	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14653	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14654  "TARGET_SSSE3"
14655  "@
14656   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14657   vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14658  [(set_attr "isa" "noavx,avx")
14659   (set_attr "type" "sseiadd")
14660   (set_attr "atom_unit" "complex")
14661   (set_attr "prefix_data16" "1,*")
14662   (set_attr "prefix_extra" "1")
14663   (set_attr "prefix" "orig,vex")
14664   (set_attr "mode" "TI")])
14665
14666(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
14667  [(set (match_operand:V2SI 0 "register_operand" "=y")
14668	(vec_concat:V2SI
14669	  (plusminus:SI
14670	    (vec_select:SI
14671	      (match_operand:V2SI 1 "register_operand" "0")
14672	      (parallel [(const_int 0)]))
14673	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14674	  (plusminus:SI
14675	    (vec_select:SI
14676	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14677	      (parallel [(const_int 0)]))
14678	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14679  "TARGET_SSSE3"
14680  "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
14681  [(set_attr "type" "sseiadd")
14682   (set_attr "atom_unit" "complex")
14683   (set_attr "prefix_extra" "1")
14684   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14685   (set_attr "mode" "DI")])
14686
14687(define_insn "avx2_pmaddubsw256"
14688  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
14689	(ss_plus:V16HI
14690	  (mult:V16HI
14691	    (zero_extend:V16HI
14692	      (vec_select:V16QI
14693		(match_operand:V32QI 1 "register_operand" "x,v")
14694		(parallel [(const_int 0) (const_int 2)
14695			   (const_int 4) (const_int 6)
14696			   (const_int 8) (const_int 10)
14697			   (const_int 12) (const_int 14)
14698			   (const_int 16) (const_int 18)
14699			   (const_int 20) (const_int 22)
14700			   (const_int 24) (const_int 26)
14701			   (const_int 28) (const_int 30)])))
14702	    (sign_extend:V16HI
14703	      (vec_select:V16QI
14704		(match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
14705		(parallel [(const_int 0) (const_int 2)
14706			   (const_int 4) (const_int 6)
14707			   (const_int 8) (const_int 10)
14708			   (const_int 12) (const_int 14)
14709			   (const_int 16) (const_int 18)
14710			   (const_int 20) (const_int 22)
14711			   (const_int 24) (const_int 26)
14712			   (const_int 28) (const_int 30)]))))
14713	  (mult:V16HI
14714	    (zero_extend:V16HI
14715	      (vec_select:V16QI (match_dup 1)
14716		(parallel [(const_int 1) (const_int 3)
14717			   (const_int 5) (const_int 7)
14718			   (const_int 9) (const_int 11)
14719			   (const_int 13) (const_int 15)
14720			   (const_int 17) (const_int 19)
14721			   (const_int 21) (const_int 23)
14722			   (const_int 25) (const_int 27)
14723			   (const_int 29) (const_int 31)])))
14724	    (sign_extend:V16HI
14725	      (vec_select:V16QI (match_dup 2)
14726		(parallel [(const_int 1) (const_int 3)
14727			   (const_int 5) (const_int 7)
14728			   (const_int 9) (const_int 11)
14729			   (const_int 13) (const_int 15)
14730			   (const_int 17) (const_int 19)
14731			   (const_int 21) (const_int 23)
14732			   (const_int 25) (const_int 27)
14733			   (const_int 29) (const_int 31)]))))))]
14734  "TARGET_AVX2"
14735  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14736  [(set_attr "isa" "*,avx512bw")
14737   (set_attr "type" "sseiadd")
14738   (set_attr "prefix_extra" "1")
14739   (set_attr "prefix" "vex,evex")
14740   (set_attr "mode" "OI")])
14741
14742;; The correct representation for this is absolutely enormous, and
14743;; surely not generally useful.
14744(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14745  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14746          (unspec:VI2_AVX512VL
14747            [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14748             (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14749             UNSPEC_PMADDUBSW512))]
14750   "TARGET_AVX512BW"
14751   "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14752  [(set_attr "type" "sseiadd")
14753   (set_attr "prefix" "evex")
14754   (set_attr "mode" "XI")])
14755
14756(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14757  [(set (match_operand:V32HI 0 "register_operand" "=v")
14758	(truncate:V32HI
14759	  (lshiftrt:V32SI
14760	    (plus:V32SI
14761	      (lshiftrt:V32SI
14762		(mult:V32SI
14763		  (sign_extend:V32SI
14764		    (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14765		  (sign_extend:V32SI
14766		    (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14767		(const_int 14))
14768	      (const_vector:V32HI [(const_int 1) (const_int 1)
14769				   (const_int 1) (const_int 1)
14770				   (const_int 1) (const_int 1)
14771				   (const_int 1) (const_int 1)
14772				   (const_int 1) (const_int 1)
14773				   (const_int 1) (const_int 1)
14774				   (const_int 1) (const_int 1)
14775				   (const_int 1) (const_int 1)
14776				   (const_int 1) (const_int 1)
14777				   (const_int 1) (const_int 1)
14778				   (const_int 1) (const_int 1)
14779				   (const_int 1) (const_int 1)
14780				   (const_int 1) (const_int 1)
14781				   (const_int 1) (const_int 1)
14782				   (const_int 1) (const_int 1)
14783				   (const_int 1) (const_int 1)]))
14784	    (const_int 1))))]
14785  "TARGET_AVX512BW"
14786  "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14787  [(set_attr "type" "sseimul")
14788   (set_attr "prefix" "evex")
14789   (set_attr "mode" "XI")])
14790
14791(define_insn "ssse3_pmaddubsw128"
14792  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
14793	(ss_plus:V8HI
14794	  (mult:V8HI
14795	    (zero_extend:V8HI
14796	      (vec_select:V8QI
14797		(match_operand:V16QI 1 "register_operand" "0,x,v")
14798		(parallel [(const_int 0) (const_int 2)
14799			   (const_int 4) (const_int 6)
14800			   (const_int 8) (const_int 10)
14801			   (const_int 12) (const_int 14)])))
14802	    (sign_extend:V8HI
14803	      (vec_select:V8QI
14804		(match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
14805		(parallel [(const_int 0) (const_int 2)
14806			   (const_int 4) (const_int 6)
14807			   (const_int 8) (const_int 10)
14808			   (const_int 12) (const_int 14)]))))
14809	  (mult:V8HI
14810	    (zero_extend:V8HI
14811	      (vec_select:V8QI (match_dup 1)
14812		(parallel [(const_int 1) (const_int 3)
14813			   (const_int 5) (const_int 7)
14814			   (const_int 9) (const_int 11)
14815			   (const_int 13) (const_int 15)])))
14816	    (sign_extend:V8HI
14817	      (vec_select:V8QI (match_dup 2)
14818		(parallel [(const_int 1) (const_int 3)
14819			   (const_int 5) (const_int 7)
14820			   (const_int 9) (const_int 11)
14821			   (const_int 13) (const_int 15)]))))))]
14822  "TARGET_SSSE3"
14823  "@
14824   pmaddubsw\t{%2, %0|%0, %2}
14825   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
14826   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14827  [(set_attr "isa" "noavx,avx,avx512bw")
14828   (set_attr "type" "sseiadd")
14829   (set_attr "atom_unit" "simul")
14830   (set_attr "prefix_data16" "1,*,*")
14831   (set_attr "prefix_extra" "1")
14832   (set_attr "prefix" "orig,vex,evex")
14833   (set_attr "mode" "TI")])
14834
14835(define_insn "ssse3_pmaddubsw"
14836  [(set (match_operand:V4HI 0 "register_operand" "=y")
14837	(ss_plus:V4HI
14838	  (mult:V4HI
14839	    (zero_extend:V4HI
14840	      (vec_select:V4QI
14841		(match_operand:V8QI 1 "register_operand" "0")
14842		(parallel [(const_int 0) (const_int 2)
14843			   (const_int 4) (const_int 6)])))
14844	    (sign_extend:V4HI
14845	      (vec_select:V4QI
14846		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
14847		(parallel [(const_int 0) (const_int 2)
14848			   (const_int 4) (const_int 6)]))))
14849	  (mult:V4HI
14850	    (zero_extend:V4HI
14851	      (vec_select:V4QI (match_dup 1)
14852		(parallel [(const_int 1) (const_int 3)
14853			   (const_int 5) (const_int 7)])))
14854	    (sign_extend:V4HI
14855	      (vec_select:V4QI (match_dup 2)
14856		(parallel [(const_int 1) (const_int 3)
14857			   (const_int 5) (const_int 7)]))))))]
14858  "TARGET_SSSE3"
14859  "pmaddubsw\t{%2, %0|%0, %2}"
14860  [(set_attr "type" "sseiadd")
14861   (set_attr "atom_unit" "simul")
14862   (set_attr "prefix_extra" "1")
14863   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14864   (set_attr "mode" "DI")])
14865
14866(define_mode_iterator PMULHRSW
14867  [V4HI V8HI (V16HI "TARGET_AVX2")])
14868
14869(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14870  [(set (match_operand:PMULHRSW 0 "register_operand")
14871	(vec_merge:PMULHRSW
14872	  (truncate:PMULHRSW
14873	    (lshiftrt:<ssedoublemode>
14874	      (plus:<ssedoublemode>
14875	        (lshiftrt:<ssedoublemode>
14876		  (mult:<ssedoublemode>
14877		    (sign_extend:<ssedoublemode>
14878		      (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14879		    (sign_extend:<ssedoublemode>
14880		      (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14881		  (const_int 14))
14882	        (match_dup 5))
14883	      (const_int 1)))
14884	  (match_operand:PMULHRSW 3 "register_operand")
14885	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14886  "TARGET_AVX512BW && TARGET_AVX512VL"
14887{
14888  operands[5] = CONST1_RTX(<MODE>mode);
14889  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14890})
14891
14892(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14893  [(set (match_operand:PMULHRSW 0 "register_operand")
14894	(truncate:PMULHRSW
14895	  (lshiftrt:<ssedoublemode>
14896	    (plus:<ssedoublemode>
14897	      (lshiftrt:<ssedoublemode>
14898		(mult:<ssedoublemode>
14899		  (sign_extend:<ssedoublemode>
14900		    (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14901		  (sign_extend:<ssedoublemode>
14902		    (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14903		(const_int 14))
14904	      (match_dup 3))
14905	    (const_int 1))))]
14906  "TARGET_AVX2"
14907{
14908  operands[3] = CONST1_RTX(<MODE>mode);
14909  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14910})
14911
14912(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14913  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
14914	(truncate:VI2_AVX2
14915	  (lshiftrt:<ssedoublemode>
14916	    (plus:<ssedoublemode>
14917	      (lshiftrt:<ssedoublemode>
14918		(mult:<ssedoublemode>
14919		  (sign_extend:<ssedoublemode>
14920		    (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
14921		  (sign_extend:<ssedoublemode>
14922		    (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
14923		(const_int 14))
14924	      (match_operand:VI2_AVX2 3 "const1_operand"))
14925	    (const_int 1))))]
14926  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14927   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14928  "@
14929   pmulhrsw\t{%2, %0|%0, %2}
14930   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
14931   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14932  [(set_attr "isa" "noavx,avx,avx512bw")
14933   (set_attr "type" "sseimul")
14934   (set_attr "prefix_data16" "1,*,*")
14935   (set_attr "prefix_extra" "1")
14936   (set_attr "prefix" "orig,maybe_evex,evex")
14937   (set_attr "mode" "<sseinsnmode>")])
14938
14939(define_insn "*ssse3_pmulhrswv4hi3"
14940  [(set (match_operand:V4HI 0 "register_operand" "=y")
14941	(truncate:V4HI
14942	  (lshiftrt:V4SI
14943	    (plus:V4SI
14944	      (lshiftrt:V4SI
14945		(mult:V4SI
14946		  (sign_extend:V4SI
14947		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14948		  (sign_extend:V4SI
14949		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14950		(const_int 14))
14951	      (match_operand:V4HI 3 "const1_operand"))
14952	    (const_int 1))))]
14953  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14954  "pmulhrsw\t{%2, %0|%0, %2}"
14955  [(set_attr "type" "sseimul")
14956   (set_attr "prefix_extra" "1")
14957   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14958   (set_attr "mode" "DI")])
14959
14960(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14961  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
14962	(unspec:VI1_AVX512
14963	  [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
14964	   (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
14965	  UNSPEC_PSHUFB))]
14966  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14967  "@
14968   pshufb\t{%2, %0|%0, %2}
14969   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
14970   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14971  [(set_attr "isa" "noavx,avx,avx512bw")
14972   (set_attr "type" "sselog1")
14973   (set_attr "prefix_data16" "1,*,*")
14974   (set_attr "prefix_extra" "1")
14975   (set_attr "prefix" "orig,maybe_evex,evex")
14976   (set_attr "btver2_decode" "vector")
14977   (set_attr "mode" "<sseinsnmode>")])
14978
14979(define_insn "ssse3_pshufbv8qi3"
14980  [(set (match_operand:V8QI 0 "register_operand" "=y")
14981	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14982		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14983		     UNSPEC_PSHUFB))]
14984  "TARGET_SSSE3"
14985  "pshufb\t{%2, %0|%0, %2}";
14986  [(set_attr "type" "sselog1")
14987   (set_attr "prefix_extra" "1")
14988   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14989   (set_attr "mode" "DI")])
14990
14991(define_insn "<ssse3_avx2>_psign<mode>3"
14992  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14993	(unspec:VI124_AVX2
14994	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14995	   (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
14996	  UNSPEC_PSIGN))]
14997  "TARGET_SSSE3"
14998  "@
14999   psign<ssemodesuffix>\t{%2, %0|%0, %2}
15000   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15001  [(set_attr "isa" "noavx,avx")
15002   (set_attr "type" "sselog1")
15003   (set_attr "prefix_data16" "1,*")
15004   (set_attr "prefix_extra" "1")
15005   (set_attr "prefix" "orig,vex")
15006   (set_attr "mode" "<sseinsnmode>")])
15007
15008(define_insn "ssse3_psign<mode>3"
15009  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
15010	(unspec:MMXMODEI
15011	  [(match_operand:MMXMODEI 1 "register_operand" "0")
15012	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
15013	  UNSPEC_PSIGN))]
15014  "TARGET_SSSE3"
15015  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
15016  [(set_attr "type" "sselog1")
15017   (set_attr "prefix_extra" "1")
15018   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15019   (set_attr "mode" "DI")])
15020
15021(define_insn "<ssse3_avx2>_palignr<mode>_mask"
15022  [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
15023        (vec_merge:VI1_AVX512
15024	  (unspec:VI1_AVX512
15025	    [(match_operand:VI1_AVX512 1 "register_operand" "v")
15026	     (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
15027	     (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
15028	    UNSPEC_PALIGNR)
15029	(match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
15030	(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
15031  "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
15032{
15033  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15034  return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
15035}
15036  [(set_attr "type" "sseishft")
15037   (set_attr "atom_unit" "sishuf")
15038   (set_attr "prefix_extra" "1")
15039   (set_attr "length_immediate" "1")
15040   (set_attr "prefix" "evex")
15041   (set_attr "mode" "<sseinsnmode>")])
15042
15043(define_insn "<ssse3_avx2>_palignr<mode>"
15044  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
15045	(unspec:SSESCALARMODE
15046	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
15047	   (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
15048	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
15049	  UNSPEC_PALIGNR))]
15050  "TARGET_SSSE3"
15051{
15052  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15053
15054  switch (which_alternative)
15055    {
15056    case 0:
15057      return "palignr\t{%3, %2, %0|%0, %2, %3}";
15058    case 1:
15059    case 2:
15060      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15061    default:
15062      gcc_unreachable ();
15063    }
15064}
15065  [(set_attr "isa" "noavx,avx,avx512bw")
15066   (set_attr "type" "sseishft")
15067   (set_attr "atom_unit" "sishuf")
15068   (set_attr "prefix_data16" "1,*,*")
15069   (set_attr "prefix_extra" "1")
15070   (set_attr "length_immediate" "1")
15071   (set_attr "prefix" "orig,vex,evex")
15072   (set_attr "mode" "<sseinsnmode>")])
15073
15074(define_insn "ssse3_palignrdi"
15075  [(set (match_operand:DI 0 "register_operand" "=y")
15076	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
15077		    (match_operand:DI 2 "nonimmediate_operand" "ym")
15078		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
15079		   UNSPEC_PALIGNR))]
15080  "TARGET_SSSE3"
15081{
15082  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15083  return "palignr\t{%3, %2, %0|%0, %2, %3}";
15084}
15085  [(set_attr "type" "sseishft")
15086   (set_attr "atom_unit" "sishuf")
15087   (set_attr "prefix_extra" "1")
15088   (set_attr "length_immediate" "1")
15089   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15090   (set_attr "mode" "DI")])
15091
15092;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
15093;; modes for abs instruction on pre AVX-512 targets.
15094(define_mode_iterator VI1248_AVX512VL_AVX512BW
15095  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
15096   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
15097   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
15098   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
15099
15100(define_insn "*abs<mode>2"
15101  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
15102	(abs:VI1248_AVX512VL_AVX512BW
15103	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
15104  "TARGET_SSSE3"
15105  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
15106  [(set_attr "type" "sselog1")
15107   (set_attr "prefix_data16" "1")
15108   (set_attr "prefix_extra" "1")
15109   (set_attr "prefix" "maybe_vex")
15110   (set_attr "mode" "<sseinsnmode>")])
15111
15112(define_insn "abs<mode>2_mask"
15113  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
15114	(vec_merge:VI48_AVX512VL
15115	  (abs:VI48_AVX512VL
15116	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
15117	  (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
15118	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
15119  "TARGET_AVX512F"
15120  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15121  [(set_attr "type" "sselog1")
15122   (set_attr "prefix" "evex")
15123   (set_attr "mode" "<sseinsnmode>")])
15124
15125(define_insn "abs<mode>2_mask"
15126  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
15127	(vec_merge:VI12_AVX512VL
15128	  (abs:VI12_AVX512VL
15129	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
15130	  (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
15131	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
15132  "TARGET_AVX512BW"
15133  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15134  [(set_attr "type" "sselog1")
15135   (set_attr "prefix" "evex")
15136   (set_attr "mode" "<sseinsnmode>")])
15137
15138(define_expand "abs<mode>2"
15139  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
15140	(abs:VI1248_AVX512VL_AVX512BW
15141	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
15142  "TARGET_SSE2"
15143{
15144  if (!TARGET_SSSE3)
15145    {
15146      ix86_expand_sse2_abs (operands[0], operands[1]);
15147      DONE;
15148    }
15149})
15150
15151(define_insn "abs<mode>2"
15152  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
15153	(abs:MMXMODEI
15154	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
15155  "TARGET_SSSE3"
15156  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
15157  [(set_attr "type" "sselog1")
15158   (set_attr "prefix_rep" "0")
15159   (set_attr "prefix_extra" "1")
15160   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15161   (set_attr "mode" "DI")])
15162
15163;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15164;;
15165;; AMD SSE4A instructions
15166;;
15167;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15168
15169(define_insn "sse4a_movnt<mode>"
15170  [(set (match_operand:MODEF 0 "memory_operand" "=m")
15171	(unspec:MODEF
15172	  [(match_operand:MODEF 1 "register_operand" "x")]
15173	  UNSPEC_MOVNT))]
15174  "TARGET_SSE4A"
15175  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
15176  [(set_attr "type" "ssemov")
15177   (set_attr "mode" "<MODE>")])
15178
15179(define_insn "sse4a_vmmovnt<mode>"
15180  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
15181	(unspec:<ssescalarmode>
15182	  [(vec_select:<ssescalarmode>
15183	     (match_operand:VF_128 1 "register_operand" "x")
15184	     (parallel [(const_int 0)]))]
15185	  UNSPEC_MOVNT))]
15186  "TARGET_SSE4A"
15187  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
15188  [(set_attr "type" "ssemov")
15189   (set_attr "mode" "<ssescalarmode>")])
15190
15191(define_insn "sse4a_extrqi"
15192  [(set (match_operand:V2DI 0 "register_operand" "=x")
15193	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15194		      (match_operand 2 "const_0_to_255_operand")
15195		      (match_operand 3 "const_0_to_255_operand")]
15196		     UNSPEC_EXTRQI))]
15197  "TARGET_SSE4A"
15198  "extrq\t{%3, %2, %0|%0, %2, %3}"
15199  [(set_attr "type" "sse")
15200   (set_attr "prefix_data16" "1")
15201   (set_attr "length_immediate" "2")
15202   (set_attr "mode" "TI")])
15203
15204(define_insn "sse4a_extrq"
15205  [(set (match_operand:V2DI 0 "register_operand" "=x")
15206	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15207		      (match_operand:V16QI 2 "register_operand" "x")]
15208		     UNSPEC_EXTRQ))]
15209  "TARGET_SSE4A"
15210  "extrq\t{%2, %0|%0, %2}"
15211  [(set_attr "type" "sse")
15212   (set_attr "prefix_data16" "1")
15213   (set_attr "mode" "TI")])
15214
15215(define_insn "sse4a_insertqi"
15216  [(set (match_operand:V2DI 0 "register_operand" "=x")
15217	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15218		      (match_operand:V2DI 2 "register_operand" "x")
15219		      (match_operand 3 "const_0_to_255_operand")
15220		      (match_operand 4 "const_0_to_255_operand")]
15221		     UNSPEC_INSERTQI))]
15222  "TARGET_SSE4A"
15223  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
15224  [(set_attr "type" "sseins")
15225   (set_attr "prefix_data16" "0")
15226   (set_attr "prefix_rep" "1")
15227   (set_attr "length_immediate" "2")
15228   (set_attr "mode" "TI")])
15229
15230(define_insn "sse4a_insertq"
15231  [(set (match_operand:V2DI 0 "register_operand" "=x")
15232	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15233		      (match_operand:V2DI 2 "register_operand" "x")]
15234		     UNSPEC_INSERTQ))]
15235  "TARGET_SSE4A"
15236  "insertq\t{%2, %0|%0, %2}"
15237  [(set_attr "type" "sseins")
15238   (set_attr "prefix_data16" "0")
15239   (set_attr "prefix_rep" "1")
15240   (set_attr "mode" "TI")])
15241
15242;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15243;;
15244;; Intel SSE4.1 instructions
15245;;
15246;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15247
15248;; Mapping of immediate bits for blend instructions
15249(define_mode_attr blendbits
15250  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
15251
15252(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
15253  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15254	(vec_merge:VF_128_256
15255	  (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15256	  (match_operand:VF_128_256 1 "register_operand" "0,0,x")
15257	  (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
15258  "TARGET_SSE4_1"
15259  "@
15260   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15261   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15262   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15263  [(set_attr "isa" "noavx,noavx,avx")
15264   (set_attr "type" "ssemov")
15265   (set_attr "length_immediate" "1")
15266   (set_attr "prefix_data16" "1,1,*")
15267   (set_attr "prefix_extra" "1")
15268   (set_attr "prefix" "orig,orig,vex")
15269   (set_attr "mode" "<MODE>")])
15270
15271(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
15272  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15273	(unspec:VF_128_256
15274	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15275	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15276	   (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
15277	  UNSPEC_BLENDV))]
15278  "TARGET_SSE4_1"
15279  "@
15280   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15281   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15282   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15283  [(set_attr "isa" "noavx,noavx,avx")
15284   (set_attr "type" "ssemov")
15285   (set_attr "length_immediate" "1")
15286   (set_attr "prefix_data16" "1,1,*")
15287   (set_attr "prefix_extra" "1")
15288   (set_attr "prefix" "orig,orig,vex")
15289   (set_attr "btver2_decode" "vector,vector,vector")
15290   (set_attr "mode" "<MODE>")])
15291
15292(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
15293  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15294	(unspec:VF_128_256
15295	  [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
15296	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15297	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15298	  UNSPEC_DP))]
15299  "TARGET_SSE4_1"
15300  "@
15301   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15302   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15303   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15304  [(set_attr "isa" "noavx,noavx,avx")
15305   (set_attr "type" "ssemul")
15306   (set_attr "length_immediate" "1")
15307   (set_attr "prefix_data16" "1,1,*")
15308   (set_attr "prefix_extra" "1")
15309   (set_attr "prefix" "orig,orig,vex")
15310   (set_attr "btver2_decode" "vector,vector,vector")
15311   (set_attr "znver1_decode" "vector,vector,vector")
15312   (set_attr "mode" "<MODE>")])
15313
15314;; Mode attribute used by `vmovntdqa' pattern
15315(define_mode_attr vi8_sse4_1_avx2_avx512
15316   [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
15317
15318(define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
15319  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
15320	(unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
15321		     UNSPEC_MOVNTDQA))]
15322  "TARGET_SSE4_1"
15323  "%vmovntdqa\t{%1, %0|%0, %1}"
15324  [(set_attr "isa" "noavx,noavx,avx")
15325   (set_attr "type" "ssemov")
15326   (set_attr "prefix_extra" "1,1,*")
15327   (set_attr "prefix" "orig,orig,maybe_evex")
15328   (set_attr "mode" "<sseinsnmode>")])
15329
15330(define_insn "<sse4_1_avx2>_mpsadbw"
15331  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15332	(unspec:VI1_AVX2
15333	  [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15334	   (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15335	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15336	  UNSPEC_MPSADBW))]
15337  "TARGET_SSE4_1"
15338  "@
15339   mpsadbw\t{%3, %2, %0|%0, %2, %3}
15340   mpsadbw\t{%3, %2, %0|%0, %2, %3}
15341   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15342  [(set_attr "isa" "noavx,noavx,avx")
15343   (set_attr "type" "sselog1")
15344   (set_attr "length_immediate" "1")
15345   (set_attr "prefix_extra" "1")
15346   (set_attr "prefix" "orig,orig,vex")
15347   (set_attr "btver2_decode" "vector,vector,vector")
15348   (set_attr "znver1_decode" "vector,vector,vector")
15349   (set_attr "mode" "<sseinsnmode>")])
15350
15351(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
15352  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
15353	(vec_concat:VI2_AVX2
15354	  (us_truncate:<ssehalfvecmode>
15355	    (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
15356	  (us_truncate:<ssehalfvecmode>
15357	    (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
15358  "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15359  "@
15360   packusdw\t{%2, %0|%0, %2}
15361   packusdw\t{%2, %0|%0, %2}
15362   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
15363   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15364  [(set_attr "isa" "noavx,noavx,avx,avx512bw")
15365   (set_attr "type" "sselog")
15366   (set_attr "prefix_extra" "1")
15367   (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
15368   (set_attr "mode" "<sseinsnmode>")])
15369
15370(define_insn "<sse4_1_avx2>_pblendvb"
15371  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15372	(unspec:VI1_AVX2
15373	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,0,x")
15374	   (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15375	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
15376	  UNSPEC_BLENDV))]
15377  "TARGET_SSE4_1"
15378  "@
15379   pblendvb\t{%3, %2, %0|%0, %2, %3}
15380   pblendvb\t{%3, %2, %0|%0, %2, %3}
15381   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15382  [(set_attr "isa" "noavx,noavx,avx")
15383   (set_attr "type" "ssemov")
15384   (set_attr "prefix_extra" "1")
15385   (set_attr "length_immediate" "*,*,1")
15386   (set_attr "prefix" "orig,orig,vex")
15387   (set_attr "btver2_decode" "vector,vector,vector")
15388   (set_attr "mode" "<sseinsnmode>")])
15389
15390(define_insn "sse4_1_pblendw"
15391  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15392	(vec_merge:V8HI
15393	  (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
15394	  (match_operand:V8HI 1 "register_operand" "0,0,x")
15395	  (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
15396  "TARGET_SSE4_1"
15397  "@
15398   pblendw\t{%3, %2, %0|%0, %2, %3}
15399   pblendw\t{%3, %2, %0|%0, %2, %3}
15400   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15401  [(set_attr "isa" "noavx,noavx,avx")
15402   (set_attr "type" "ssemov")
15403   (set_attr "prefix_extra" "1")
15404   (set_attr "length_immediate" "1")
15405   (set_attr "prefix" "orig,orig,vex")
15406   (set_attr "mode" "TI")])
15407
15408;; The builtin uses an 8-bit immediate.  Expand that.
15409(define_expand "avx2_pblendw"
15410  [(set (match_operand:V16HI 0 "register_operand")
15411	(vec_merge:V16HI
15412	  (match_operand:V16HI 2 "nonimmediate_operand")
15413	  (match_operand:V16HI 1 "register_operand")
15414	  (match_operand:SI 3 "const_0_to_255_operand")))]
15415  "TARGET_AVX2"
15416{
15417  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
15418  operands[3] = GEN_INT (val << 8 | val);
15419})
15420
15421(define_insn "*avx2_pblendw"
15422  [(set (match_operand:V16HI 0 "register_operand" "=x")
15423	(vec_merge:V16HI
15424	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15425	  (match_operand:V16HI 1 "register_operand" "x")
15426	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
15427  "TARGET_AVX2"
15428{
15429  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
15430  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15431}
15432  [(set_attr "type" "ssemov")
15433   (set_attr "prefix_extra" "1")
15434   (set_attr "length_immediate" "1")
15435   (set_attr "prefix" "vex")
15436   (set_attr "mode" "OI")])
15437
15438(define_insn "avx2_pblendd<mode>"
15439  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
15440	(vec_merge:VI4_AVX2
15441	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
15442	  (match_operand:VI4_AVX2 1 "register_operand" "x")
15443	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
15444  "TARGET_AVX2"
15445  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15446  [(set_attr "type" "ssemov")
15447   (set_attr "prefix_extra" "1")
15448   (set_attr "length_immediate" "1")
15449   (set_attr "prefix" "vex")
15450   (set_attr "mode" "<sseinsnmode>")])
15451
15452(define_insn "sse4_1_phminposuw"
15453  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15454	(unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
15455		     UNSPEC_PHMINPOSUW))]
15456  "TARGET_SSE4_1"
15457  "%vphminposuw\t{%1, %0|%0, %1}"
15458  [(set_attr "isa" "noavx,noavx,avx")
15459   (set_attr "type" "sselog1")
15460   (set_attr "prefix_extra" "1")
15461   (set_attr "prefix" "orig,orig,vex")
15462   (set_attr "mode" "TI")])
15463
15464(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
15465  [(set (match_operand:V16HI 0 "register_operand" "=v")
15466	(any_extend:V16HI
15467	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15468  "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15469  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15470  [(set_attr "type" "ssemov")
15471   (set_attr "prefix_extra" "1")
15472   (set_attr "prefix" "maybe_evex")
15473   (set_attr "mode" "OI")])
15474
15475(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
15476  [(set (match_operand:V32HI 0 "register_operand" "=v")
15477	(any_extend:V32HI
15478	  (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
15479  "TARGET_AVX512BW"
15480  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15481  [(set_attr "type" "ssemov")
15482   (set_attr "prefix_extra" "1")
15483   (set_attr "prefix" "evex")
15484   (set_attr "mode" "XI")])
15485
15486(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
15487  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
15488	(any_extend:V8HI
15489	  (vec_select:V8QI
15490	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15491	    (parallel [(const_int 0) (const_int 1)
15492		       (const_int 2) (const_int 3)
15493		       (const_int 4) (const_int 5)
15494		       (const_int 6) (const_int 7)]))))]
15495  "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15496  "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15497  [(set_attr "isa" "noavx,noavx,avx")
15498   (set_attr "type" "ssemov")
15499   (set_attr "prefix_extra" "1")
15500   (set_attr "prefix" "orig,orig,maybe_evex")
15501   (set_attr "mode" "TI")])
15502
15503(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
15504  [(set (match_operand:V16SI 0 "register_operand" "=v")
15505	(any_extend:V16SI
15506	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15507  "TARGET_AVX512F"
15508  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15509  [(set_attr "type" "ssemov")
15510   (set_attr "prefix" "evex")
15511   (set_attr "mode" "XI")])
15512
15513(define_insn "avx2_<code>v8qiv8si2<mask_name>"
15514  [(set (match_operand:V8SI 0 "register_operand" "=v")
15515	(any_extend:V8SI
15516	  (vec_select:V8QI
15517	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15518	    (parallel [(const_int 0) (const_int 1)
15519		       (const_int 2) (const_int 3)
15520		       (const_int 4) (const_int 5)
15521		       (const_int 6) (const_int 7)]))))]
15522  "TARGET_AVX2 && <mask_avx512vl_condition>"
15523  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15524  [(set_attr "type" "ssemov")
15525   (set_attr "prefix_extra" "1")
15526   (set_attr "prefix" "maybe_evex")
15527   (set_attr "mode" "OI")])
15528
15529(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
15530  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15531	(any_extend:V4SI
15532	  (vec_select:V4QI
15533	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15534	    (parallel [(const_int 0) (const_int 1)
15535		       (const_int 2) (const_int 3)]))))]
15536  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15537  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15538  [(set_attr "isa" "noavx,noavx,avx")
15539   (set_attr "type" "ssemov")
15540   (set_attr "prefix_extra" "1")
15541   (set_attr "prefix" "orig,orig,maybe_evex")
15542   (set_attr "mode" "TI")])
15543
15544(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
15545  [(set (match_operand:V16SI 0 "register_operand" "=v")
15546	(any_extend:V16SI
15547	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
15548  "TARGET_AVX512F"
15549  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15550  [(set_attr "type" "ssemov")
15551   (set_attr "prefix" "evex")
15552   (set_attr "mode" "XI")])
15553
15554(define_insn "avx2_<code>v8hiv8si2<mask_name>"
15555  [(set (match_operand:V8SI 0 "register_operand" "=v")
15556	(any_extend:V8SI
15557	    (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15558  "TARGET_AVX2 && <mask_avx512vl_condition>"
15559  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15560  [(set_attr "type" "ssemov")
15561   (set_attr "prefix_extra" "1")
15562   (set_attr "prefix" "maybe_evex")
15563   (set_attr "mode" "OI")])
15564
15565(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
15566  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15567	(any_extend:V4SI
15568	  (vec_select:V4HI
15569	    (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15570	    (parallel [(const_int 0) (const_int 1)
15571		       (const_int 2) (const_int 3)]))))]
15572  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15573  "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15574  [(set_attr "isa" "noavx,noavx,avx")
15575   (set_attr "type" "ssemov")
15576   (set_attr "prefix_extra" "1")
15577   (set_attr "prefix" "orig,orig,maybe_evex")
15578   (set_attr "mode" "TI")])
15579
15580(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
15581  [(set (match_operand:V8DI 0 "register_operand" "=v")
15582	(any_extend:V8DI
15583	  (vec_select:V8QI
15584	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15585	    (parallel [(const_int 0) (const_int 1)
15586		       (const_int 2) (const_int 3)
15587		       (const_int 4) (const_int 5)
15588		       (const_int 6) (const_int 7)]))))]
15589  "TARGET_AVX512F"
15590  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15591  [(set_attr "type" "ssemov")
15592   (set_attr "prefix" "evex")
15593   (set_attr "mode" "XI")])
15594
15595(define_insn "avx2_<code>v4qiv4di2<mask_name>"
15596  [(set (match_operand:V4DI 0 "register_operand" "=v")
15597	(any_extend:V4DI
15598	  (vec_select:V4QI
15599	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15600	    (parallel [(const_int 0) (const_int 1)
15601		       (const_int 2) (const_int 3)]))))]
15602  "TARGET_AVX2 && <mask_avx512vl_condition>"
15603  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15604  [(set_attr "type" "ssemov")
15605   (set_attr "prefix_extra" "1")
15606   (set_attr "prefix" "maybe_evex")
15607   (set_attr "mode" "OI")])
15608
15609(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
15610  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15611	(any_extend:V2DI
15612	  (vec_select:V2QI
15613	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15614	    (parallel [(const_int 0) (const_int 1)]))))]
15615  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15616  "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
15617  [(set_attr "isa" "noavx,noavx,avx")
15618   (set_attr "type" "ssemov")
15619   (set_attr "prefix_extra" "1")
15620   (set_attr "prefix" "orig,orig,maybe_evex")
15621   (set_attr "mode" "TI")])
15622
15623(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
15624  [(set (match_operand:V8DI 0 "register_operand" "=v")
15625	(any_extend:V8DI
15626	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15627  "TARGET_AVX512F"
15628  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15629  [(set_attr "type" "ssemov")
15630   (set_attr "prefix" "evex")
15631   (set_attr "mode" "XI")])
15632
15633(define_insn "avx2_<code>v4hiv4di2<mask_name>"
15634  [(set (match_operand:V4DI 0 "register_operand" "=v")
15635	(any_extend:V4DI
15636	  (vec_select:V4HI
15637	    (match_operand:V8HI 1 "nonimmediate_operand" "vm")
15638	    (parallel [(const_int 0) (const_int 1)
15639		       (const_int 2) (const_int 3)]))))]
15640  "TARGET_AVX2 && <mask_avx512vl_condition>"
15641  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15642  [(set_attr "type" "ssemov")
15643   (set_attr "prefix_extra" "1")
15644   (set_attr "prefix" "maybe_evex")
15645   (set_attr "mode" "OI")])
15646
15647(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
15648  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15649	(any_extend:V2DI
15650	  (vec_select:V2HI
15651	    (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15652	    (parallel [(const_int 0) (const_int 1)]))))]
15653  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15654  "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15655  [(set_attr "isa" "noavx,noavx,avx")
15656   (set_attr "type" "ssemov")
15657   (set_attr "prefix_extra" "1")
15658   (set_attr "prefix" "orig,orig,maybe_evex")
15659   (set_attr "mode" "TI")])
15660
15661(define_insn "avx512f_<code>v8siv8di2<mask_name>"
15662  [(set (match_operand:V8DI 0 "register_operand" "=v")
15663	(any_extend:V8DI
15664	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15665  "TARGET_AVX512F"
15666  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15667  [(set_attr "type" "ssemov")
15668   (set_attr "prefix" "evex")
15669   (set_attr "mode" "XI")])
15670
15671(define_insn "avx2_<code>v4siv4di2<mask_name>"
15672  [(set (match_operand:V4DI 0 "register_operand" "=v")
15673	(any_extend:V4DI
15674	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15675  "TARGET_AVX2 && <mask_avx512vl_condition>"
15676  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15677  [(set_attr "type" "ssemov")
15678   (set_attr "prefix" "maybe_evex")
15679   (set_attr "prefix_extra" "1")
15680   (set_attr "mode" "OI")])
15681
15682(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
15683  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15684	(any_extend:V2DI
15685	  (vec_select:V2SI
15686	    (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15687	    (parallel [(const_int 0) (const_int 1)]))))]
15688  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15689  "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15690  [(set_attr "isa" "noavx,noavx,avx")
15691   (set_attr "type" "ssemov")
15692   (set_attr "prefix_extra" "1")
15693   (set_attr "prefix" "orig,orig,maybe_evex")
15694   (set_attr "mode" "TI")])
15695
15696;; ptestps/ptestpd are very similar to comiss and ucomiss when
15697;; setting FLAGS_REG. But it is not a really compare instruction.
15698(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
15699  [(set (reg:CC FLAGS_REG)
15700	(unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15701		    (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
15702		   UNSPEC_VTESTP))]
15703  "TARGET_AVX"
15704  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
15705  [(set_attr "type" "ssecomi")
15706   (set_attr "prefix_extra" "1")
15707   (set_attr "prefix" "vex")
15708   (set_attr "mode" "<MODE>")])
15709
15710;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15711;; But it is not a really compare instruction.
15712(define_insn "<sse4_1>_ptest<mode>"
15713  [(set (reg:CC FLAGS_REG)
15714	(unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15715		    (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
15716		   UNSPEC_PTEST))]
15717  "TARGET_SSE4_1"
15718  "%vptest\t{%1, %0|%0, %1}"
15719  [(set_attr "isa" "noavx,noavx,avx")
15720   (set_attr "type" "ssecomi")
15721   (set_attr "prefix_extra" "1")
15722   (set_attr "prefix" "orig,orig,vex")
15723   (set (attr "btver2_decode")
15724     (if_then_else
15725       (match_test "<sseinsnmode>mode==OImode")
15726     (const_string "vector")
15727     (const_string "*")))
15728   (set_attr "mode" "<sseinsnmode>")])
15729
15730(define_insn "ptesttf2"
15731  [(set (reg:CC FLAGS_REG)
15732	(unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
15733		    (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
15734		   UNSPEC_PTEST))]
15735  "TARGET_SSE4_1"
15736  "%vptest\t{%1, %0|%0, %1}"
15737  [(set_attr "isa" "noavx,noavx,avx")
15738   (set_attr "type" "ssecomi")
15739   (set_attr "prefix_extra" "1")
15740   (set_attr "prefix" "orig,orig,vex")
15741   (set_attr "mode" "TI")])
15742
15743(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
15744  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15745	(unspec:VF_128_256
15746	  [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
15747	   (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
15748	  UNSPEC_ROUND))]
15749  "TARGET_SSE4_1"
15750  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15751  [(set_attr "isa" "noavx,noavx,avx")
15752   (set_attr "type" "ssecvt")
15753   (set_attr "prefix_data16" "1,1,*")
15754   (set_attr "prefix_extra" "1")
15755   (set_attr "length_immediate" "1")
15756   (set_attr "prefix" "orig,orig,vex")
15757   (set_attr "mode" "<MODE>")])
15758
15759(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
15760  [(match_operand:<sseintvecmode> 0 "register_operand")
15761   (match_operand:VF1_128_256 1 "vector_operand")
15762   (match_operand:SI 2 "const_0_to_15_operand")]
15763  "TARGET_SSE4_1"
15764{
15765  rtx tmp = gen_reg_rtx (<MODE>mode);
15766
15767  emit_insn
15768    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15769						       operands[2]));
15770  emit_insn
15771    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15772  DONE;
15773})
15774
15775(define_expand "avx512f_round<castmode>512"
15776  [(match_operand:VF_512 0 "register_operand")
15777   (match_operand:VF_512 1 "nonimmediate_operand")
15778   (match_operand:SI 2 "const_0_to_15_operand")]
15779  "TARGET_AVX512F"
15780{
15781  emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
15782  DONE;
15783})
15784
15785(define_expand "avx512f_roundps512_sfix"
15786  [(match_operand:V16SI 0 "register_operand")
15787   (match_operand:V16SF 1 "nonimmediate_operand")
15788   (match_operand:SI 2 "const_0_to_15_operand")]
15789  "TARGET_AVX512F"
15790{
15791  rtx tmp = gen_reg_rtx (V16SFmode);
15792  emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
15793  emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
15794  DONE;
15795})
15796
15797(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
15798  [(match_operand:<ssepackfltmode> 0 "register_operand")
15799   (match_operand:VF2 1 "vector_operand")
15800   (match_operand:VF2 2 "vector_operand")
15801   (match_operand:SI 3 "const_0_to_15_operand")]
15802  "TARGET_SSE4_1"
15803{
15804  rtx tmp0, tmp1;
15805
15806  if (<MODE>mode == V2DFmode
15807      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15808    {
15809      rtx tmp2 = gen_reg_rtx (V4DFmode);
15810
15811      tmp0 = gen_reg_rtx (V4DFmode);
15812      tmp1 = force_reg (V2DFmode, operands[1]);
15813
15814      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15815      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15816      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15817    }
15818  else
15819    {
15820      tmp0 = gen_reg_rtx (<MODE>mode);
15821      tmp1 = gen_reg_rtx (<MODE>mode);
15822
15823      emit_insn
15824       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15825							  operands[3]));
15826      emit_insn
15827       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15828							  operands[3]));
15829      emit_insn
15830       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15831    }
15832  DONE;
15833})
15834
15835(define_insn "sse4_1_round<ssescalarmodesuffix>"
15836  [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
15837	(vec_merge:VF_128
15838	  (unspec:VF_128
15839	    [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
15840	     (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
15841	    UNSPEC_ROUND)
15842	  (match_operand:VF_128 1 "register_operand" "0,0,x,v")
15843	  (const_int 1)))]
15844  "TARGET_SSE4_1"
15845  "@
15846   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15847   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15848   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
15849   vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15850  [(set_attr "isa" "noavx,noavx,avx,avx512f")
15851   (set_attr "type" "ssecvt")
15852   (set_attr "length_immediate" "1")
15853   (set_attr "prefix_data16" "1,1,*,*")
15854   (set_attr "prefix_extra" "1")
15855   (set_attr "prefix" "orig,orig,vex,evex")
15856   (set_attr "mode" "<MODE>")])
15857
15858(define_expand "round<mode>2"
15859  [(set (match_dup 3)
15860	(plus:VF
15861	  (match_operand:VF 1 "register_operand")
15862	  (match_dup 2)))
15863   (set (match_operand:VF 0 "register_operand")
15864	(unspec:VF
15865	  [(match_dup 3) (match_dup 4)]
15866	  UNSPEC_ROUND))]
15867  "TARGET_SSE4_1 && !flag_trapping_math"
15868{
15869  machine_mode scalar_mode;
15870  const struct real_format *fmt;
15871  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15872  rtx half, vec_half;
15873
15874  scalar_mode = GET_MODE_INNER (<MODE>mode);
15875
15876  /* load nextafter (0.5, 0.0) */
15877  fmt = REAL_MODE_FORMAT (scalar_mode);
15878  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
15879  real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
15880  half = const_double_from_real_value (pred_half, scalar_mode);
15881
15882  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15883  vec_half = force_reg (<MODE>mode, vec_half);
15884
15885  operands[2] = gen_reg_rtx (<MODE>mode);
15886  emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
15887
15888  operands[3] = gen_reg_rtx (<MODE>mode);
15889  operands[4] = GEN_INT (ROUND_TRUNC);
15890})
15891
15892(define_expand "round<mode>2_sfix"
15893  [(match_operand:<sseintvecmode> 0 "register_operand")
15894   (match_operand:VF1 1 "register_operand")]
15895  "TARGET_SSE4_1 && !flag_trapping_math"
15896{
15897  rtx tmp = gen_reg_rtx (<MODE>mode);
15898
15899  emit_insn (gen_round<mode>2 (tmp, operands[1]));
15900
15901  emit_insn
15902    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15903  DONE;
15904})
15905
15906(define_expand "round<mode>2_vec_pack_sfix"
15907  [(match_operand:<ssepackfltmode> 0 "register_operand")
15908   (match_operand:VF2 1 "register_operand")
15909   (match_operand:VF2 2 "register_operand")]
15910  "TARGET_SSE4_1 && !flag_trapping_math"
15911{
15912  rtx tmp0, tmp1;
15913
15914  if (<MODE>mode == V2DFmode
15915      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15916    {
15917      rtx tmp2 = gen_reg_rtx (V4DFmode);
15918
15919      tmp0 = gen_reg_rtx (V4DFmode);
15920      tmp1 = force_reg (V2DFmode, operands[1]);
15921
15922      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15923      emit_insn (gen_roundv4df2 (tmp2, tmp0));
15924      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15925    }
15926  else
15927    {
15928      tmp0 = gen_reg_rtx (<MODE>mode);
15929      tmp1 = gen_reg_rtx (<MODE>mode);
15930
15931      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15932      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15933
15934      emit_insn
15935       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15936    }
15937  DONE;
15938})
15939
15940;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15941;;
15942;; Intel SSE4.2 string/text processing instructions
15943;;
15944;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15945
15946(define_insn_and_split "sse4_2_pcmpestr"
15947  [(set (match_operand:SI 0 "register_operand" "=c,c")
15948	(unspec:SI
15949	  [(match_operand:V16QI 2 "register_operand" "x,x")
15950	   (match_operand:SI 3 "register_operand" "a,a")
15951	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15952	   (match_operand:SI 5 "register_operand" "d,d")
15953	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15954	  UNSPEC_PCMPESTR))
15955   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15956	(unspec:V16QI
15957	  [(match_dup 2)
15958	   (match_dup 3)
15959	   (match_dup 4)
15960	   (match_dup 5)
15961	   (match_dup 6)]
15962	  UNSPEC_PCMPESTR))
15963   (set (reg:CC FLAGS_REG)
15964	(unspec:CC
15965	  [(match_dup 2)
15966	   (match_dup 3)
15967	   (match_dup 4)
15968	   (match_dup 5)
15969	   (match_dup 6)]
15970	  UNSPEC_PCMPESTR))]
15971  "TARGET_SSE4_2
15972   && can_create_pseudo_p ()"
15973  "#"
15974  "&& 1"
15975  [(const_int 0)]
15976{
15977  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15978  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15979  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15980
15981  if (ecx)
15982    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15983				     operands[3], operands[4],
15984				     operands[5], operands[6]));
15985  if (xmm0)
15986    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15987				     operands[3], operands[4],
15988				     operands[5], operands[6]));
15989  if (flags && !(ecx || xmm0))
15990    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15991					   operands[2], operands[3],
15992					   operands[4], operands[5],
15993					   operands[6]));
15994  if (!(flags || ecx || xmm0))
15995    emit_note (NOTE_INSN_DELETED);
15996
15997  DONE;
15998}
15999  [(set_attr "type" "sselog")
16000   (set_attr "prefix_data16" "1")
16001   (set_attr "prefix_extra" "1")
16002   (set_attr "length_immediate" "1")
16003   (set_attr "memory" "none,load")
16004   (set_attr "mode" "TI")])
16005
16006(define_insn "sse4_2_pcmpestri"
16007  [(set (match_operand:SI 0 "register_operand" "=c,c")
16008	(unspec:SI
16009	  [(match_operand:V16QI 1 "register_operand" "x,x")
16010	   (match_operand:SI 2 "register_operand" "a,a")
16011	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
16012	   (match_operand:SI 4 "register_operand" "d,d")
16013	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
16014	  UNSPEC_PCMPESTR))
16015   (set (reg:CC FLAGS_REG)
16016	(unspec:CC
16017	  [(match_dup 1)
16018	   (match_dup 2)
16019	   (match_dup 3)
16020	   (match_dup 4)
16021	   (match_dup 5)]
16022	  UNSPEC_PCMPESTR))]
16023  "TARGET_SSE4_2"
16024  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
16025  [(set_attr "type" "sselog")
16026   (set_attr "prefix_data16" "1")
16027   (set_attr "prefix_extra" "1")
16028   (set_attr "prefix" "maybe_vex")
16029   (set_attr "length_immediate" "1")
16030   (set_attr "btver2_decode" "vector")
16031   (set_attr "memory" "none,load")
16032   (set_attr "mode" "TI")])
16033
16034(define_insn "sse4_2_pcmpestrm"
16035  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
16036	(unspec:V16QI
16037	  [(match_operand:V16QI 1 "register_operand" "x,x")
16038	   (match_operand:SI 2 "register_operand" "a,a")
16039	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
16040	   (match_operand:SI 4 "register_operand" "d,d")
16041	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
16042	  UNSPEC_PCMPESTR))
16043   (set (reg:CC FLAGS_REG)
16044	(unspec:CC
16045	  [(match_dup 1)
16046	   (match_dup 2)
16047	   (match_dup 3)
16048	   (match_dup 4)
16049	   (match_dup 5)]
16050	  UNSPEC_PCMPESTR))]
16051  "TARGET_SSE4_2"
16052  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
16053  [(set_attr "type" "sselog")
16054   (set_attr "prefix_data16" "1")
16055   (set_attr "prefix_extra" "1")
16056   (set_attr "length_immediate" "1")
16057   (set_attr "prefix" "maybe_vex")
16058   (set_attr "btver2_decode" "vector")
16059   (set_attr "memory" "none,load")
16060   (set_attr "mode" "TI")])
16061
16062(define_insn "sse4_2_pcmpestr_cconly"
16063  [(set (reg:CC FLAGS_REG)
16064	(unspec:CC
16065	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16066	   (match_operand:SI 3 "register_operand" "a,a,a,a")
16067	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
16068	   (match_operand:SI 5 "register_operand" "d,d,d,d")
16069	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
16070	  UNSPEC_PCMPESTR))
16071   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16072   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
16073  "TARGET_SSE4_2"
16074  "@
16075   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
16076   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
16077   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
16078   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
16079  [(set_attr "type" "sselog")
16080   (set_attr "prefix_data16" "1")
16081   (set_attr "prefix_extra" "1")
16082   (set_attr "length_immediate" "1")
16083   (set_attr "memory" "none,load,none,load")
16084   (set_attr "btver2_decode" "vector,vector,vector,vector")
16085   (set_attr "prefix" "maybe_vex")
16086   (set_attr "mode" "TI")])
16087
16088(define_insn_and_split "sse4_2_pcmpistr"
16089  [(set (match_operand:SI 0 "register_operand" "=c,c")
16090	(unspec:SI
16091	  [(match_operand:V16QI 2 "register_operand" "x,x")
16092	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
16093	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
16094	  UNSPEC_PCMPISTR))
16095   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
16096	(unspec:V16QI
16097	  [(match_dup 2)
16098	   (match_dup 3)
16099	   (match_dup 4)]
16100	  UNSPEC_PCMPISTR))
16101   (set (reg:CC FLAGS_REG)
16102	(unspec:CC
16103	  [(match_dup 2)
16104	   (match_dup 3)
16105	   (match_dup 4)]
16106	  UNSPEC_PCMPISTR))]
16107  "TARGET_SSE4_2
16108   && can_create_pseudo_p ()"
16109  "#"
16110  "&& 1"
16111  [(const_int 0)]
16112{
16113  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
16114  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
16115  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
16116
16117  if (ecx)
16118    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
16119				     operands[3], operands[4]));
16120  if (xmm0)
16121    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
16122				     operands[3], operands[4]));
16123  if (flags && !(ecx || xmm0))
16124    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
16125					   operands[2], operands[3],
16126					   operands[4]));
16127  if (!(flags || ecx || xmm0))
16128    emit_note (NOTE_INSN_DELETED);
16129
16130  DONE;
16131}
16132  [(set_attr "type" "sselog")
16133   (set_attr "prefix_data16" "1")
16134   (set_attr "prefix_extra" "1")
16135   (set_attr "length_immediate" "1")
16136   (set_attr "memory" "none,load")
16137   (set_attr "mode" "TI")])
16138
16139(define_insn "sse4_2_pcmpistri"
16140  [(set (match_operand:SI 0 "register_operand" "=c,c")
16141	(unspec:SI
16142	  [(match_operand:V16QI 1 "register_operand" "x,x")
16143	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16144	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16145	  UNSPEC_PCMPISTR))
16146   (set (reg:CC FLAGS_REG)
16147	(unspec:CC
16148	  [(match_dup 1)
16149	   (match_dup 2)
16150	   (match_dup 3)]
16151	  UNSPEC_PCMPISTR))]
16152  "TARGET_SSE4_2"
16153  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
16154  [(set_attr "type" "sselog")
16155   (set_attr "prefix_data16" "1")
16156   (set_attr "prefix_extra" "1")
16157   (set_attr "length_immediate" "1")
16158   (set_attr "prefix" "maybe_vex")
16159   (set_attr "memory" "none,load")
16160   (set_attr "btver2_decode" "vector")
16161   (set_attr "mode" "TI")])
16162
16163(define_insn "sse4_2_pcmpistrm"
16164  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
16165	(unspec:V16QI
16166	  [(match_operand:V16QI 1 "register_operand" "x,x")
16167	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16168	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16169	  UNSPEC_PCMPISTR))
16170   (set (reg:CC FLAGS_REG)
16171	(unspec:CC
16172	  [(match_dup 1)
16173	   (match_dup 2)
16174	   (match_dup 3)]
16175	  UNSPEC_PCMPISTR))]
16176  "TARGET_SSE4_2"
16177  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
16178  [(set_attr "type" "sselog")
16179   (set_attr "prefix_data16" "1")
16180   (set_attr "prefix_extra" "1")
16181   (set_attr "length_immediate" "1")
16182   (set_attr "prefix" "maybe_vex")
16183   (set_attr "memory" "none,load")
16184   (set_attr "btver2_decode" "vector")
16185   (set_attr "mode" "TI")])
16186
16187(define_insn "sse4_2_pcmpistr_cconly"
16188  [(set (reg:CC FLAGS_REG)
16189	(unspec:CC
16190	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16191	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
16192	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
16193	  UNSPEC_PCMPISTR))
16194   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16195   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
16196  "TARGET_SSE4_2"
16197  "@
16198   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16199   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16200   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
16201   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
16202  [(set_attr "type" "sselog")
16203   (set_attr "prefix_data16" "1")
16204   (set_attr "prefix_extra" "1")
16205   (set_attr "length_immediate" "1")
16206   (set_attr "memory" "none,load,none,load")
16207   (set_attr "prefix" "maybe_vex")
16208   (set_attr "btver2_decode" "vector,vector,vector,vector")
16209   (set_attr "mode" "TI")])
16210
16211;; Packed float variants
16212(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
16213		      [(V8DI "V8SF") (V16SI "V16SF")])
16214
16215(define_expand "avx512pf_gatherpf<mode>sf"
16216  [(unspec
16217     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16218      (mem:<GATHER_SCATTER_SF_MEM_MODE>
16219	(match_par_dup 5
16220	  [(match_operand 2 "vsib_address_operand")
16221	   (match_operand:VI48_512 1 "register_operand")
16222	   (match_operand:SI 3 "const1248_operand")]))
16223      (match_operand:SI 4 "const_2_to_3_operand")]
16224     UNSPEC_GATHER_PREFETCH)]
16225  "TARGET_AVX512PF"
16226{
16227  operands[5]
16228    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16229					operands[3]), UNSPEC_VSIBADDR);
16230})
16231
16232(define_insn "*avx512pf_gatherpf<mode>sf_mask"
16233  [(unspec
16234     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16235      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16236	[(unspec:P
16237	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16238	    (match_operand:VI48_512 1 "register_operand" "v")
16239	    (match_operand:SI 3 "const1248_operand" "n")]
16240	   UNSPEC_VSIBADDR)])
16241      (match_operand:SI 4 "const_2_to_3_operand" "n")]
16242     UNSPEC_GATHER_PREFETCH)]
16243  "TARGET_AVX512PF"
16244{
16245  switch (INTVAL (operands[4]))
16246    {
16247    case 3:
16248      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16249    case 2:
16250      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16251    default:
16252      gcc_unreachable ();
16253    }
16254}
16255  [(set_attr "type" "sse")
16256   (set_attr "prefix" "evex")
16257   (set_attr "mode" "XI")])
16258
16259;; Packed double variants
16260(define_expand "avx512pf_gatherpf<mode>df"
16261  [(unspec
16262     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16263      (mem:V8DF
16264	(match_par_dup 5
16265	  [(match_operand 2 "vsib_address_operand")
16266	   (match_operand:VI4_256_8_512 1 "register_operand")
16267	   (match_operand:SI 3 "const1248_operand")]))
16268      (match_operand:SI 4 "const_2_to_3_operand")]
16269     UNSPEC_GATHER_PREFETCH)]
16270  "TARGET_AVX512PF"
16271{
16272  operands[5]
16273    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16274					operands[3]), UNSPEC_VSIBADDR);
16275})
16276
16277(define_insn "*avx512pf_gatherpf<mode>df_mask"
16278  [(unspec
16279     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16280      (match_operator:V8DF 5 "vsib_mem_operator"
16281	[(unspec:P
16282	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16283	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
16284	    (match_operand:SI 3 "const1248_operand" "n")]
16285	   UNSPEC_VSIBADDR)])
16286      (match_operand:SI 4 "const_2_to_3_operand" "n")]
16287     UNSPEC_GATHER_PREFETCH)]
16288  "TARGET_AVX512PF"
16289{
16290  switch (INTVAL (operands[4]))
16291    {
16292    case 3:
16293      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16294    case 2:
16295      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16296    default:
16297      gcc_unreachable ();
16298    }
16299}
16300  [(set_attr "type" "sse")
16301   (set_attr "prefix" "evex")
16302   (set_attr "mode" "XI")])
16303
16304;; Packed float variants
16305(define_expand "avx512pf_scatterpf<mode>sf"
16306  [(unspec
16307     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16308      (mem:<GATHER_SCATTER_SF_MEM_MODE>
16309	(match_par_dup 5
16310	  [(match_operand 2 "vsib_address_operand")
16311	   (match_operand:VI48_512 1 "register_operand")
16312	   (match_operand:SI 3 "const1248_operand")]))
16313      (match_operand:SI 4 "const2367_operand")]
16314     UNSPEC_SCATTER_PREFETCH)]
16315  "TARGET_AVX512PF"
16316{
16317  operands[5]
16318    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16319					operands[3]), UNSPEC_VSIBADDR);
16320})
16321
16322(define_insn "*avx512pf_scatterpf<mode>sf_mask"
16323  [(unspec
16324     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16325      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16326	[(unspec:P
16327	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16328	    (match_operand:VI48_512 1 "register_operand" "v")
16329	    (match_operand:SI 3 "const1248_operand" "n")]
16330	   UNSPEC_VSIBADDR)])
16331      (match_operand:SI 4 "const2367_operand" "n")]
16332     UNSPEC_SCATTER_PREFETCH)]
16333  "TARGET_AVX512PF"
16334{
16335  switch (INTVAL (operands[4]))
16336    {
16337    case 3:
16338    case 7:
16339      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16340    case 2:
16341    case 6:
16342      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16343    default:
16344      gcc_unreachable ();
16345    }
16346}
16347  [(set_attr "type" "sse")
16348   (set_attr "prefix" "evex")
16349   (set_attr "mode" "XI")])
16350
16351;; Packed double variants
16352(define_expand "avx512pf_scatterpf<mode>df"
16353  [(unspec
16354     [(match_operand:<avx512fmaskmode> 0 "register_operand")
16355      (mem:V8DF
16356	(match_par_dup 5
16357	  [(match_operand 2 "vsib_address_operand")
16358	   (match_operand:VI4_256_8_512 1 "register_operand")
16359	   (match_operand:SI 3 "const1248_operand")]))
16360      (match_operand:SI 4 "const2367_operand")]
16361     UNSPEC_SCATTER_PREFETCH)]
16362  "TARGET_AVX512PF"
16363{
16364  operands[5]
16365    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16366					operands[3]), UNSPEC_VSIBADDR);
16367})
16368
16369(define_insn "*avx512pf_scatterpf<mode>df_mask"
16370  [(unspec
16371     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16372      (match_operator:V8DF 5 "vsib_mem_operator"
16373	[(unspec:P
16374	   [(match_operand:P 2 "vsib_address_operand" "Tv")
16375	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
16376	    (match_operand:SI 3 "const1248_operand" "n")]
16377	   UNSPEC_VSIBADDR)])
16378      (match_operand:SI 4 "const2367_operand" "n")]
16379     UNSPEC_SCATTER_PREFETCH)]
16380  "TARGET_AVX512PF"
16381{
16382  switch (INTVAL (operands[4]))
16383    {
16384    case 3:
16385    case 7:
16386      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16387    case 2:
16388    case 6:
16389      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16390    default:
16391      gcc_unreachable ();
16392    }
16393}
16394  [(set_attr "type" "sse")
16395   (set_attr "prefix" "evex")
16396   (set_attr "mode" "XI")])
16397
16398(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
16399  [(set (match_operand:VF_512 0 "register_operand" "=v")
16400	(unspec:VF_512
16401	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16402	  UNSPEC_EXP2))]
16403  "TARGET_AVX512ER"
16404  "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16405  [(set_attr "prefix" "evex")
16406   (set_attr "type" "sse")
16407   (set_attr "mode" "<MODE>")])
16408
16409(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
16410  [(set (match_operand:VF_512 0 "register_operand" "=v")
16411	(unspec:VF_512
16412	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16413	  UNSPEC_RCP28))]
16414  "TARGET_AVX512ER"
16415  "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16416  [(set_attr "prefix" "evex")
16417   (set_attr "type" "sse")
16418   (set_attr "mode" "<MODE>")])
16419
16420(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
16421  [(set (match_operand:VF_128 0 "register_operand" "=v")
16422	(vec_merge:VF_128
16423	  (unspec:VF_128
16424	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16425	    UNSPEC_RCP28)
16426	  (match_operand:VF_128 2 "register_operand" "v")
16427	  (const_int 1)))]
16428  "TARGET_AVX512ER"
16429  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
16430  [(set_attr "length_immediate" "1")
16431   (set_attr "prefix" "evex")
16432   (set_attr "type" "sse")
16433   (set_attr "mode" "<MODE>")])
16434
16435(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
16436  [(set (match_operand:VF_512 0 "register_operand" "=v")
16437	(unspec:VF_512
16438	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16439	  UNSPEC_RSQRT28))]
16440  "TARGET_AVX512ER"
16441  "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16442  [(set_attr "prefix" "evex")
16443   (set_attr "type" "sse")
16444   (set_attr "mode" "<MODE>")])
16445
16446(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16447  [(set (match_operand:VF_128 0 "register_operand" "=v")
16448	(vec_merge:VF_128
16449	  (unspec:VF_128
16450	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16451	    UNSPEC_RSQRT28)
16452	  (match_operand:VF_128 2 "register_operand" "v")
16453	  (const_int 1)))]
16454  "TARGET_AVX512ER"
16455  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
16456  [(set_attr "length_immediate" "1")
16457   (set_attr "type" "sse")
16458   (set_attr "prefix" "evex")
16459   (set_attr "mode" "<MODE>")])
16460
16461;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16462;;
16463;; XOP instructions
16464;;
16465;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16466
16467(define_code_iterator xop_plus [plus ss_plus])
16468
16469(define_code_attr macs [(plus "macs") (ss_plus "macss")])
16470(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16471
16472;; XOP parallel integer multiply/add instructions.
16473
16474(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16475  [(set (match_operand:VI24_128 0 "register_operand" "=x")
16476	(xop_plus:VI24_128
16477	 (mult:VI24_128
16478	  (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16479	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
16480	 (match_operand:VI24_128 3 "register_operand" "x")))]
16481  "TARGET_XOP"
16482  "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16483  [(set_attr "type" "ssemuladd")
16484   (set_attr "mode" "TI")])
16485
16486(define_insn "xop_p<macs>dql"
16487  [(set (match_operand:V2DI 0 "register_operand" "=x")
16488	(xop_plus:V2DI
16489	 (mult:V2DI
16490	  (sign_extend:V2DI
16491	   (vec_select:V2SI
16492	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16493	    (parallel [(const_int 0) (const_int 2)])))
16494	  (sign_extend:V2DI
16495	   (vec_select:V2SI
16496	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16497	    (parallel [(const_int 0) (const_int 2)]))))
16498	 (match_operand:V2DI 3 "register_operand" "x")))]
16499  "TARGET_XOP"
16500  "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16501  [(set_attr "type" "ssemuladd")
16502   (set_attr "mode" "TI")])
16503
16504(define_insn "xop_p<macs>dqh"
16505  [(set (match_operand:V2DI 0 "register_operand" "=x")
16506	(xop_plus:V2DI
16507	 (mult:V2DI
16508	  (sign_extend:V2DI
16509	   (vec_select:V2SI
16510	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16511	    (parallel [(const_int 1) (const_int 3)])))
16512	  (sign_extend:V2DI
16513	   (vec_select:V2SI
16514	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16515	    (parallel [(const_int 1) (const_int 3)]))))
16516	 (match_operand:V2DI 3 "register_operand" "x")))]
16517  "TARGET_XOP"
16518  "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16519  [(set_attr "type" "ssemuladd")
16520   (set_attr "mode" "TI")])
16521
16522;; XOP parallel integer multiply/add instructions for the intrinisics
16523(define_insn "xop_p<macs>wd"
16524  [(set (match_operand:V4SI 0 "register_operand" "=x")
16525	(xop_plus:V4SI
16526	 (mult:V4SI
16527	  (sign_extend:V4SI
16528	   (vec_select:V4HI
16529	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16530	    (parallel [(const_int 1) (const_int 3)
16531		       (const_int 5) (const_int 7)])))
16532	  (sign_extend:V4SI
16533	   (vec_select:V4HI
16534	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16535	    (parallel [(const_int 1) (const_int 3)
16536		       (const_int 5) (const_int 7)]))))
16537	 (match_operand:V4SI 3 "register_operand" "x")))]
16538  "TARGET_XOP"
16539  "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16540  [(set_attr "type" "ssemuladd")
16541   (set_attr "mode" "TI")])
16542
16543(define_insn "xop_p<madcs>wd"
16544  [(set (match_operand:V4SI 0 "register_operand" "=x")
16545	(xop_plus:V4SI
16546	 (plus:V4SI
16547	  (mult:V4SI
16548	   (sign_extend:V4SI
16549	    (vec_select:V4HI
16550	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16551	     (parallel [(const_int 0) (const_int 2)
16552			(const_int 4) (const_int 6)])))
16553	   (sign_extend:V4SI
16554	    (vec_select:V4HI
16555	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16556	     (parallel [(const_int 0) (const_int 2)
16557			(const_int 4) (const_int 6)]))))
16558	  (mult:V4SI
16559	   (sign_extend:V4SI
16560	    (vec_select:V4HI
16561	     (match_dup 1)
16562	     (parallel [(const_int 1) (const_int 3)
16563			(const_int 5) (const_int 7)])))
16564	   (sign_extend:V4SI
16565	    (vec_select:V4HI
16566	     (match_dup 2)
16567	     (parallel [(const_int 1) (const_int 3)
16568			(const_int 5) (const_int 7)])))))
16569	 (match_operand:V4SI 3 "register_operand" "x")))]
16570  "TARGET_XOP"
16571  "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16572  [(set_attr "type" "ssemuladd")
16573   (set_attr "mode" "TI")])
16574
16575;; XOP parallel XMM conditional moves
16576(define_insn "xop_pcmov_<mode><avxsizesuffix>"
16577  [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
16578	(if_then_else:V_128_256
16579	  (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
16580	  (match_operand:V_128_256 1 "register_operand" "x,x")
16581	  (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
16582  "TARGET_XOP"
16583  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16584  [(set_attr "type" "sse4arg")])
16585
16586;; XOP horizontal add/subtract instructions
16587(define_insn "xop_phadd<u>bw"
16588  [(set (match_operand:V8HI 0 "register_operand" "=x")
16589	(plus:V8HI
16590	 (any_extend:V8HI
16591	  (vec_select:V8QI
16592	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16593	   (parallel [(const_int 0) (const_int 2)
16594		      (const_int 4) (const_int 6)
16595		      (const_int 8) (const_int 10)
16596		      (const_int 12) (const_int 14)])))
16597	 (any_extend:V8HI
16598	  (vec_select:V8QI
16599	   (match_dup 1)
16600	   (parallel [(const_int 1) (const_int 3)
16601		      (const_int 5) (const_int 7)
16602		      (const_int 9) (const_int 11)
16603		      (const_int 13) (const_int 15)])))))]
16604  "TARGET_XOP"
16605  "vphadd<u>bw\t{%1, %0|%0, %1}"
16606  [(set_attr "type" "sseiadd1")])
16607
16608(define_insn "xop_phadd<u>bd"
16609  [(set (match_operand:V4SI 0 "register_operand" "=x")
16610	(plus:V4SI
16611	 (plus:V4SI
16612	  (any_extend:V4SI
16613	   (vec_select:V4QI
16614	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16615	    (parallel [(const_int 0) (const_int 4)
16616		       (const_int 8) (const_int 12)])))
16617	  (any_extend:V4SI
16618	   (vec_select:V4QI
16619	    (match_dup 1)
16620	    (parallel [(const_int 1) (const_int 5)
16621		       (const_int 9) (const_int 13)]))))
16622	 (plus:V4SI
16623	  (any_extend:V4SI
16624	   (vec_select:V4QI
16625	    (match_dup 1)
16626	    (parallel [(const_int 2) (const_int 6)
16627		       (const_int 10) (const_int 14)])))
16628	  (any_extend:V4SI
16629	   (vec_select:V4QI
16630	    (match_dup 1)
16631	    (parallel [(const_int 3) (const_int 7)
16632		       (const_int 11) (const_int 15)]))))))]
16633  "TARGET_XOP"
16634  "vphadd<u>bd\t{%1, %0|%0, %1}"
16635  [(set_attr "type" "sseiadd1")])
16636
16637(define_insn "xop_phadd<u>bq"
16638  [(set (match_operand:V2DI 0 "register_operand" "=x")
16639	(plus:V2DI
16640	 (plus:V2DI
16641	  (plus:V2DI
16642	   (any_extend:V2DI
16643	    (vec_select:V2QI
16644	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16645	     (parallel [(const_int 0) (const_int 8)])))
16646	   (any_extend:V2DI
16647	    (vec_select:V2QI
16648	     (match_dup 1)
16649	     (parallel [(const_int 1) (const_int 9)]))))
16650	  (plus:V2DI
16651	   (any_extend:V2DI
16652	    (vec_select:V2QI
16653	     (match_dup 1)
16654	     (parallel [(const_int 2) (const_int 10)])))
16655	   (any_extend:V2DI
16656	    (vec_select:V2QI
16657	     (match_dup 1)
16658	     (parallel [(const_int 3) (const_int 11)])))))
16659	 (plus:V2DI
16660	  (plus:V2DI
16661	   (any_extend:V2DI
16662	    (vec_select:V2QI
16663	     (match_dup 1)
16664	     (parallel [(const_int 4) (const_int 12)])))
16665	   (any_extend:V2DI
16666	    (vec_select:V2QI
16667	     (match_dup 1)
16668	     (parallel [(const_int 5) (const_int 13)]))))
16669	  (plus:V2DI
16670	   (any_extend:V2DI
16671	    (vec_select:V2QI
16672	     (match_dup 1)
16673	     (parallel [(const_int 6) (const_int 14)])))
16674	   (any_extend:V2DI
16675	    (vec_select:V2QI
16676	     (match_dup 1)
16677	     (parallel [(const_int 7) (const_int 15)])))))))]
16678  "TARGET_XOP"
16679  "vphadd<u>bq\t{%1, %0|%0, %1}"
16680  [(set_attr "type" "sseiadd1")])
16681
16682(define_insn "xop_phadd<u>wd"
16683  [(set (match_operand:V4SI 0 "register_operand" "=x")
16684	(plus:V4SI
16685	 (any_extend:V4SI
16686	  (vec_select:V4HI
16687	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16688	   (parallel [(const_int 0) (const_int 2)
16689		      (const_int 4) (const_int 6)])))
16690	 (any_extend:V4SI
16691	  (vec_select:V4HI
16692	   (match_dup 1)
16693	   (parallel [(const_int 1) (const_int 3)
16694		      (const_int 5) (const_int 7)])))))]
16695  "TARGET_XOP"
16696  "vphadd<u>wd\t{%1, %0|%0, %1}"
16697  [(set_attr "type" "sseiadd1")])
16698
16699(define_insn "xop_phadd<u>wq"
16700  [(set (match_operand:V2DI 0 "register_operand" "=x")
16701	(plus:V2DI
16702	 (plus:V2DI
16703	  (any_extend:V2DI
16704	   (vec_select:V2HI
16705	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16706	    (parallel [(const_int 0) (const_int 4)])))
16707	  (any_extend:V2DI
16708	   (vec_select:V2HI
16709	    (match_dup 1)
16710	    (parallel [(const_int 1) (const_int 5)]))))
16711	 (plus:V2DI
16712	  (any_extend:V2DI
16713	   (vec_select:V2HI
16714	    (match_dup 1)
16715	    (parallel [(const_int 2) (const_int 6)])))
16716	  (any_extend:V2DI
16717	   (vec_select:V2HI
16718	    (match_dup 1)
16719	    (parallel [(const_int 3) (const_int 7)]))))))]
16720  "TARGET_XOP"
16721  "vphadd<u>wq\t{%1, %0|%0, %1}"
16722  [(set_attr "type" "sseiadd1")])
16723
16724(define_insn "xop_phadd<u>dq"
16725  [(set (match_operand:V2DI 0 "register_operand" "=x")
16726	(plus:V2DI
16727	 (any_extend:V2DI
16728	  (vec_select:V2SI
16729	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16730	   (parallel [(const_int 0) (const_int 2)])))
16731	 (any_extend:V2DI
16732	  (vec_select:V2SI
16733	   (match_dup 1)
16734	   (parallel [(const_int 1) (const_int 3)])))))]
16735  "TARGET_XOP"
16736  "vphadd<u>dq\t{%1, %0|%0, %1}"
16737  [(set_attr "type" "sseiadd1")])
16738
16739(define_insn "xop_phsubbw"
16740  [(set (match_operand:V8HI 0 "register_operand" "=x")
16741	(minus:V8HI
16742	 (sign_extend:V8HI
16743	  (vec_select:V8QI
16744	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16745	   (parallel [(const_int 0) (const_int 2)
16746		      (const_int 4) (const_int 6)
16747		      (const_int 8) (const_int 10)
16748		      (const_int 12) (const_int 14)])))
16749	 (sign_extend:V8HI
16750	  (vec_select:V8QI
16751	   (match_dup 1)
16752	   (parallel [(const_int 1) (const_int 3)
16753		      (const_int 5) (const_int 7)
16754		      (const_int 9) (const_int 11)
16755		      (const_int 13) (const_int 15)])))))]
16756  "TARGET_XOP"
16757  "vphsubbw\t{%1, %0|%0, %1}"
16758  [(set_attr "type" "sseiadd1")])
16759
16760(define_insn "xop_phsubwd"
16761  [(set (match_operand:V4SI 0 "register_operand" "=x")
16762	(minus:V4SI
16763	 (sign_extend:V4SI
16764	  (vec_select:V4HI
16765	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16766	   (parallel [(const_int 0) (const_int 2)
16767		      (const_int 4) (const_int 6)])))
16768	 (sign_extend:V4SI
16769	  (vec_select:V4HI
16770	   (match_dup 1)
16771	   (parallel [(const_int 1) (const_int 3)
16772		      (const_int 5) (const_int 7)])))))]
16773  "TARGET_XOP"
16774  "vphsubwd\t{%1, %0|%0, %1}"
16775  [(set_attr "type" "sseiadd1")])
16776
16777(define_insn "xop_phsubdq"
16778  [(set (match_operand:V2DI 0 "register_operand" "=x")
16779	(minus:V2DI
16780	 (sign_extend:V2DI
16781	  (vec_select:V2SI
16782	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16783	   (parallel [(const_int 0) (const_int 2)])))
16784	 (sign_extend:V2DI
16785	  (vec_select:V2SI
16786	   (match_dup 1)
16787	   (parallel [(const_int 1) (const_int 3)])))))]
16788  "TARGET_XOP"
16789  "vphsubdq\t{%1, %0|%0, %1}"
16790  [(set_attr "type" "sseiadd1")])
16791
16792;; XOP permute instructions
16793(define_insn "xop_pperm"
16794  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16795	(unspec:V16QI
16796	  [(match_operand:V16QI 1 "register_operand" "x,x")
16797	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16798	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
16799	  UNSPEC_XOP_PERMUTE))]
16800  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16801  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16802  [(set_attr "type" "sse4arg")
16803   (set_attr "mode" "TI")])
16804
16805;; XOP pack instructions that combine two vectors into a smaller vector
16806(define_insn "xop_pperm_pack_v2di_v4si"
16807  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16808	(vec_concat:V4SI
16809	 (truncate:V2SI
16810	  (match_operand:V2DI 1 "register_operand" "x,x"))
16811	 (truncate:V2SI
16812	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16813   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16814  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16815  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16816  [(set_attr "type" "sse4arg")
16817   (set_attr "mode" "TI")])
16818
16819(define_insn "xop_pperm_pack_v4si_v8hi"
16820  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16821	(vec_concat:V8HI
16822	 (truncate:V4HI
16823	  (match_operand:V4SI 1 "register_operand" "x,x"))
16824	 (truncate:V4HI
16825	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16826   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16827  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16828  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16829  [(set_attr "type" "sse4arg")
16830   (set_attr "mode" "TI")])
16831
16832(define_insn "xop_pperm_pack_v8hi_v16qi"
16833  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16834	(vec_concat:V16QI
16835	 (truncate:V8QI
16836	  (match_operand:V8HI 1 "register_operand" "x,x"))
16837	 (truncate:V8QI
16838	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16839   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16840  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16841  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16842  [(set_attr "type" "sse4arg")
16843   (set_attr "mode" "TI")])
16844
16845;; XOP packed rotate instructions
16846(define_expand "rotl<mode>3"
16847  [(set (match_operand:VI_128 0 "register_operand")
16848	(rotate:VI_128
16849	 (match_operand:VI_128 1 "nonimmediate_operand")
16850	 (match_operand:SI 2 "general_operand")))]
16851  "TARGET_XOP"
16852{
16853  /* If we were given a scalar, convert it to parallel */
16854  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16855    {
16856      rtvec vs = rtvec_alloc (<ssescalarnum>);
16857      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16858      rtx reg = gen_reg_rtx (<MODE>mode);
16859      rtx op2 = operands[2];
16860      int i;
16861
16862      if (GET_MODE (op2) != <ssescalarmode>mode)
16863	{
16864	  op2 = gen_reg_rtx (<ssescalarmode>mode);
16865	  convert_move (op2, operands[2], false);
16866	}
16867
16868      for (i = 0; i < <ssescalarnum>; i++)
16869	RTVEC_ELT (vs, i) = op2;
16870
16871      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16872      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16873      DONE;
16874    }
16875})
16876
16877(define_expand "rotr<mode>3"
16878  [(set (match_operand:VI_128 0 "register_operand")
16879	(rotatert:VI_128
16880	 (match_operand:VI_128 1 "nonimmediate_operand")
16881	 (match_operand:SI 2 "general_operand")))]
16882  "TARGET_XOP"
16883{
16884  /* If we were given a scalar, convert it to parallel */
16885  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16886    {
16887      rtvec vs = rtvec_alloc (<ssescalarnum>);
16888      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16889      rtx neg = gen_reg_rtx (<MODE>mode);
16890      rtx reg = gen_reg_rtx (<MODE>mode);
16891      rtx op2 = operands[2];
16892      int i;
16893
16894      if (GET_MODE (op2) != <ssescalarmode>mode)
16895	{
16896	  op2 = gen_reg_rtx (<ssescalarmode>mode);
16897	  convert_move (op2, operands[2], false);
16898	}
16899
16900      for (i = 0; i < <ssescalarnum>; i++)
16901	RTVEC_ELT (vs, i) = op2;
16902
16903      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16904      emit_insn (gen_neg<mode>2 (neg, reg));
16905      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16906      DONE;
16907    }
16908})
16909
16910(define_insn "xop_rotl<mode>3"
16911  [(set (match_operand:VI_128 0 "register_operand" "=x")
16912	(rotate:VI_128
16913	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16914	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16915  "TARGET_XOP"
16916  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16917  [(set_attr "type" "sseishft")
16918   (set_attr "length_immediate" "1")
16919   (set_attr "mode" "TI")])
16920
16921(define_insn "xop_rotr<mode>3"
16922  [(set (match_operand:VI_128 0 "register_operand" "=x")
16923	(rotatert:VI_128
16924	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16925	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16926  "TARGET_XOP"
16927{
16928  operands[3]
16929    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
16930  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
16931}
16932  [(set_attr "type" "sseishft")
16933   (set_attr "length_immediate" "1")
16934   (set_attr "mode" "TI")])
16935
16936(define_expand "vrotr<mode>3"
16937  [(match_operand:VI_128 0 "register_operand")
16938   (match_operand:VI_128 1 "register_operand")
16939   (match_operand:VI_128 2 "register_operand")]
16940  "TARGET_XOP"
16941{
16942  rtx reg = gen_reg_rtx (<MODE>mode);
16943  emit_insn (gen_neg<mode>2 (reg, operands[2]));
16944  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16945  DONE;
16946})
16947
16948(define_expand "vrotl<mode>3"
16949  [(match_operand:VI_128 0 "register_operand")
16950   (match_operand:VI_128 1 "register_operand")
16951   (match_operand:VI_128 2 "register_operand")]
16952  "TARGET_XOP"
16953{
16954  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16955  DONE;
16956})
16957
16958(define_insn "xop_vrotl<mode>3"
16959  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16960	(if_then_else:VI_128
16961	 (ge:VI_128
16962	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16963	  (const_int 0))
16964	 (rotate:VI_128
16965	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16966	  (match_dup 2))
16967	 (rotatert:VI_128
16968	  (match_dup 1)
16969	  (neg:VI_128 (match_dup 2)))))]
16970  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16971  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16972  [(set_attr "type" "sseishft")
16973   (set_attr "prefix_data16" "0")
16974   (set_attr "prefix_extra" "2")
16975   (set_attr "mode" "TI")])
16976
16977;; XOP packed shift instructions.
16978(define_expand "vlshr<mode>3"
16979  [(set (match_operand:VI12_128 0 "register_operand")
16980	(lshiftrt:VI12_128
16981	  (match_operand:VI12_128 1 "register_operand")
16982	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
16983  "TARGET_XOP"
16984{
16985  rtx neg = gen_reg_rtx (<MODE>mode);
16986  emit_insn (gen_neg<mode>2 (neg, operands[2]));
16987  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16988  DONE;
16989})
16990
16991(define_expand "vlshr<mode>3"
16992  [(set (match_operand:VI48_128 0 "register_operand")
16993	(lshiftrt:VI48_128
16994	  (match_operand:VI48_128 1 "register_operand")
16995	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
16996  "TARGET_AVX2 || TARGET_XOP"
16997{
16998  if (!TARGET_AVX2)
16999    {
17000      rtx neg = gen_reg_rtx (<MODE>mode);
17001      emit_insn (gen_neg<mode>2 (neg, operands[2]));
17002      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
17003      DONE;
17004    }
17005})
17006
17007(define_expand "vlshr<mode>3"
17008  [(set (match_operand:VI48_512 0 "register_operand")
17009	(lshiftrt:VI48_512
17010	  (match_operand:VI48_512 1 "register_operand")
17011	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
17012  "TARGET_AVX512F")
17013
17014(define_expand "vlshr<mode>3"
17015  [(set (match_operand:VI48_256 0 "register_operand")
17016	(lshiftrt:VI48_256
17017	  (match_operand:VI48_256 1 "register_operand")
17018	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
17019  "TARGET_AVX2")
17020
17021(define_expand "vashrv8hi3<mask_name>"
17022  [(set (match_operand:V8HI 0 "register_operand")
17023	(ashiftrt:V8HI
17024	  (match_operand:V8HI 1 "register_operand")
17025	  (match_operand:V8HI 2 "nonimmediate_operand")))]
17026  "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
17027{
17028  if (TARGET_XOP)
17029    {
17030      rtx neg = gen_reg_rtx (V8HImode);
17031      emit_insn (gen_negv8hi2 (neg, operands[2]));
17032      emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
17033      DONE;
17034    }
17035})
17036
17037(define_expand "vashrv16qi3"
17038  [(set (match_operand:V16QI 0 "register_operand")
17039	(ashiftrt:V16QI
17040	  (match_operand:V16QI 1 "register_operand")
17041	  (match_operand:V16QI 2 "nonimmediate_operand")))]
17042  "TARGET_XOP"
17043{
17044   rtx neg = gen_reg_rtx (V16QImode);
17045   emit_insn (gen_negv16qi2 (neg, operands[2]));
17046   emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
17047   DONE;
17048})
17049
17050(define_expand "vashrv2di3<mask_name>"
17051  [(set (match_operand:V2DI 0 "register_operand")
17052	(ashiftrt:V2DI
17053	  (match_operand:V2DI 1 "register_operand")
17054	  (match_operand:V2DI 2 "nonimmediate_operand")))]
17055  "TARGET_XOP || TARGET_AVX512VL"
17056{
17057  if (TARGET_XOP)
17058    {
17059      rtx neg = gen_reg_rtx (V2DImode);
17060      emit_insn (gen_negv2di2 (neg, operands[2]));
17061      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
17062      DONE;
17063    }
17064})
17065
17066(define_expand "vashrv4si3"
17067  [(set (match_operand:V4SI 0 "register_operand")
17068	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
17069		       (match_operand:V4SI 2 "nonimmediate_operand")))]
17070  "TARGET_AVX2 || TARGET_XOP"
17071{
17072  if (!TARGET_AVX2)
17073    {
17074      rtx neg = gen_reg_rtx (V4SImode);
17075      emit_insn (gen_negv4si2 (neg, operands[2]));
17076      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
17077      DONE;
17078    }
17079})
17080
17081(define_expand "vashrv16si3"
17082  [(set (match_operand:V16SI 0 "register_operand")
17083	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
17084		        (match_operand:V16SI 2 "nonimmediate_operand")))]
17085  "TARGET_AVX512F")
17086
17087(define_expand "vashrv8si3"
17088  [(set (match_operand:V8SI 0 "register_operand")
17089	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
17090		       (match_operand:V8SI 2 "nonimmediate_operand")))]
17091  "TARGET_AVX2")
17092
17093(define_expand "vashl<mode>3"
17094  [(set (match_operand:VI12_128 0 "register_operand")
17095	(ashift:VI12_128
17096	  (match_operand:VI12_128 1 "register_operand")
17097	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
17098  "TARGET_XOP"
17099{
17100  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
17101  DONE;
17102})
17103
17104(define_expand "vashl<mode>3"
17105  [(set (match_operand:VI48_128 0 "register_operand")
17106	(ashift:VI48_128
17107	  (match_operand:VI48_128 1 "register_operand")
17108	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
17109  "TARGET_AVX2 || TARGET_XOP"
17110{
17111  if (!TARGET_AVX2)
17112    {
17113      operands[2] = force_reg (<MODE>mode, operands[2]);
17114      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
17115      DONE;
17116    }
17117})
17118
17119(define_expand "vashl<mode>3"
17120  [(set (match_operand:VI48_512 0 "register_operand")
17121	(ashift:VI48_512
17122	  (match_operand:VI48_512 1 "register_operand")
17123	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
17124  "TARGET_AVX512F")
17125
17126(define_expand "vashl<mode>3"
17127  [(set (match_operand:VI48_256 0 "register_operand")
17128	(ashift:VI48_256
17129	  (match_operand:VI48_256 1 "register_operand")
17130	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
17131  "TARGET_AVX2")
17132
17133(define_insn "xop_sha<mode>3"
17134  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
17135	(if_then_else:VI_128
17136	 (ge:VI_128
17137	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
17138	  (const_int 0))
17139	 (ashift:VI_128
17140	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17141	  (match_dup 2))
17142	 (ashiftrt:VI_128
17143	  (match_dup 1)
17144	  (neg:VI_128 (match_dup 2)))))]
17145  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17146  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17147  [(set_attr "type" "sseishft")
17148   (set_attr "prefix_data16" "0")
17149   (set_attr "prefix_extra" "2")
17150   (set_attr "mode" "TI")])
17151
17152(define_insn "xop_shl<mode>3"
17153  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
17154	(if_then_else:VI_128
17155	 (ge:VI_128
17156	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
17157	  (const_int 0))
17158	 (ashift:VI_128
17159	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17160	  (match_dup 2))
17161	 (lshiftrt:VI_128
17162	  (match_dup 1)
17163	  (neg:VI_128 (match_dup 2)))))]
17164  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17165  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17166  [(set_attr "type" "sseishft")
17167   (set_attr "prefix_data16" "0")
17168   (set_attr "prefix_extra" "2")
17169   (set_attr "mode" "TI")])
17170
17171(define_expand "<shift_insn><mode>3"
17172  [(set (match_operand:VI1_AVX512 0 "register_operand")
17173	(any_shift:VI1_AVX512
17174	  (match_operand:VI1_AVX512 1 "register_operand")
17175	  (match_operand:SI 2 "nonmemory_operand")))]
17176  "TARGET_SSE2"
17177{
17178  if (TARGET_XOP && <MODE>mode == V16QImode)
17179    {
17180      bool negate = false;
17181      rtx (*gen) (rtx, rtx, rtx);
17182      rtx tmp, par;
17183      int i;
17184
17185      if (<CODE> != ASHIFT)
17186	{
17187	  if (CONST_INT_P (operands[2]))
17188	    operands[2] = GEN_INT (-INTVAL (operands[2]));
17189	  else
17190	    negate = true;
17191	}
17192      par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
17193      for (i = 0; i < 16; i++)
17194        XVECEXP (par, 0, i) = operands[2];
17195
17196      tmp = gen_reg_rtx (V16QImode);
17197      emit_insn (gen_vec_initv16qiqi (tmp, par));
17198
17199      if (negate)
17200	emit_insn (gen_negv16qi2 (tmp, tmp));
17201
17202      gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
17203      emit_insn (gen (operands[0], operands[1], tmp));
17204    }
17205  else
17206    ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
17207  DONE;
17208})
17209
17210(define_expand "ashrv2di3"
17211  [(set (match_operand:V2DI 0 "register_operand")
17212	(ashiftrt:V2DI
17213	  (match_operand:V2DI 1 "register_operand")
17214	  (match_operand:DI 2 "nonmemory_operand")))]
17215  "TARGET_XOP || TARGET_AVX512VL"
17216{
17217  if (!TARGET_AVX512VL)
17218    {
17219      rtx reg = gen_reg_rtx (V2DImode);
17220      rtx par;
17221      bool negate = false;
17222      int i;
17223
17224      if (CONST_INT_P (operands[2]))
17225	operands[2] = GEN_INT (-INTVAL (operands[2]));
17226      else
17227	negate = true;
17228
17229      par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
17230      for (i = 0; i < 2; i++)
17231	XVECEXP (par, 0, i) = operands[2];
17232
17233      emit_insn (gen_vec_initv2didi (reg, par));
17234
17235      if (negate)
17236	emit_insn (gen_negv2di2 (reg, reg));
17237
17238      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
17239      DONE;
17240    }
17241})
17242
17243;; XOP FRCZ support
17244(define_insn "xop_frcz<mode>2"
17245  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
17246	(unspec:FMAMODE
17247	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
17248	 UNSPEC_FRCZ))]
17249  "TARGET_XOP"
17250  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
17251  [(set_attr "type" "ssecvt1")
17252   (set_attr "mode" "<MODE>")])
17253
17254(define_expand "xop_vmfrcz<mode>2"
17255  [(set (match_operand:VF_128 0 "register_operand")
17256	(vec_merge:VF_128
17257	  (unspec:VF_128
17258	   [(match_operand:VF_128 1 "nonimmediate_operand")]
17259	   UNSPEC_FRCZ)
17260	  (match_dup 2)
17261	  (const_int 1)))]
17262  "TARGET_XOP"
17263  "operands[2] = CONST0_RTX (<MODE>mode);")
17264
17265(define_insn "*xop_vmfrcz<mode>2"
17266  [(set (match_operand:VF_128 0 "register_operand" "=x")
17267	(vec_merge:VF_128
17268	  (unspec:VF_128
17269	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
17270	   UNSPEC_FRCZ)
17271	  (match_operand:VF_128 2 "const0_operand")
17272	  (const_int 1)))]
17273  "TARGET_XOP"
17274  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
17275  [(set_attr "type" "ssecvt1")
17276   (set_attr "mode" "<MODE>")])
17277
17278(define_insn "xop_maskcmp<mode>3"
17279  [(set (match_operand:VI_128 0 "register_operand" "=x")
17280	(match_operator:VI_128 1 "ix86_comparison_int_operator"
17281	 [(match_operand:VI_128 2 "register_operand" "x")
17282	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17283  "TARGET_XOP"
17284  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17285  [(set_attr "type" "sse4arg")
17286   (set_attr "prefix_data16" "0")
17287   (set_attr "prefix_rep" "0")
17288   (set_attr "prefix_extra" "2")
17289   (set_attr "length_immediate" "1")
17290   (set_attr "mode" "TI")])
17291
17292(define_insn "xop_maskcmp_uns<mode>3"
17293  [(set (match_operand:VI_128 0 "register_operand" "=x")
17294	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17295	 [(match_operand:VI_128 2 "register_operand" "x")
17296	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17297  "TARGET_XOP"
17298  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17299  [(set_attr "type" "ssecmp")
17300   (set_attr "prefix_data16" "0")
17301   (set_attr "prefix_rep" "0")
17302   (set_attr "prefix_extra" "2")
17303   (set_attr "length_immediate" "1")
17304   (set_attr "mode" "TI")])
17305
17306;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
17307;; and pcomneu* not to be converted to the signed ones in case somebody needs
17308;; the exact instruction generated for the intrinsic.
17309(define_insn "xop_maskcmp_uns2<mode>3"
17310  [(set (match_operand:VI_128 0 "register_operand" "=x")
17311	(unspec:VI_128
17312	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17313	  [(match_operand:VI_128 2 "register_operand" "x")
17314	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
17315	 UNSPEC_XOP_UNSIGNED_CMP))]
17316  "TARGET_XOP"
17317  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17318  [(set_attr "type" "ssecmp")
17319   (set_attr "prefix_data16" "0")
17320   (set_attr "prefix_extra" "2")
17321   (set_attr "length_immediate" "1")
17322   (set_attr "mode" "TI")])
17323
17324;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
17325;; being added here to be complete.
17326(define_insn "xop_pcom_tf<mode>3"
17327  [(set (match_operand:VI_128 0 "register_operand" "=x")
17328	(unspec:VI_128
17329	  [(match_operand:VI_128 1 "register_operand" "x")
17330	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
17331	   (match_operand:SI 3 "const_int_operand" "n")]
17332	  UNSPEC_XOP_TRUEFALSE))]
17333  "TARGET_XOP"
17334{
17335  return ((INTVAL (operands[3]) != 0)
17336	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17337	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
17338}
17339  [(set_attr "type" "ssecmp")
17340   (set_attr "prefix_data16" "0")
17341   (set_attr "prefix_extra" "2")
17342   (set_attr "length_immediate" "1")
17343   (set_attr "mode" "TI")])
17344
17345(define_insn "xop_vpermil2<mode>3"
17346  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
17347	(unspec:VF_128_256
17348	  [(match_operand:VF_128_256 1 "register_operand" "x,x")
17349	   (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
17350	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
17351	   (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
17352	  UNSPEC_VPERMIL2))]
17353  "TARGET_XOP"
17354  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
17355  [(set_attr "type" "sse4arg")
17356   (set_attr "length_immediate" "1")
17357   (set_attr "mode" "<MODE>")])
17358
17359;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17360
17361(define_insn "aesenc"
17362  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17363	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17364		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17365		      UNSPEC_AESENC))]
17366  "TARGET_AES"
17367  "@
17368   aesenc\t{%2, %0|%0, %2}
17369   vaesenc\t{%2, %1, %0|%0, %1, %2}"
17370  [(set_attr "isa" "noavx,avx")
17371   (set_attr "type" "sselog1")
17372   (set_attr "prefix_extra" "1")
17373   (set_attr "prefix" "orig,vex")
17374   (set_attr "btver2_decode" "double,double")
17375   (set_attr "mode" "TI")])
17376
17377(define_insn "aesenclast"
17378  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17379	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17380		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17381		      UNSPEC_AESENCLAST))]
17382  "TARGET_AES"
17383  "@
17384   aesenclast\t{%2, %0|%0, %2}
17385   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
17386  [(set_attr "isa" "noavx,avx")
17387   (set_attr "type" "sselog1")
17388   (set_attr "prefix_extra" "1")
17389   (set_attr "prefix" "orig,vex")
17390   (set_attr "btver2_decode" "double,double")
17391   (set_attr "mode" "TI")])
17392
17393(define_insn "aesdec"
17394  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17395	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17396		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17397		      UNSPEC_AESDEC))]
17398  "TARGET_AES"
17399  "@
17400   aesdec\t{%2, %0|%0, %2}
17401   vaesdec\t{%2, %1, %0|%0, %1, %2}"
17402  [(set_attr "isa" "noavx,avx")
17403   (set_attr "type" "sselog1")
17404   (set_attr "prefix_extra" "1")
17405   (set_attr "prefix" "orig,vex")
17406   (set_attr "btver2_decode" "double,double")
17407   (set_attr "mode" "TI")])
17408
17409(define_insn "aesdeclast"
17410  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17411	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17412		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17413		      UNSPEC_AESDECLAST))]
17414  "TARGET_AES"
17415  "@
17416   aesdeclast\t{%2, %0|%0, %2}
17417   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
17418  [(set_attr "isa" "noavx,avx")
17419   (set_attr "type" "sselog1")
17420   (set_attr "prefix_extra" "1")
17421   (set_attr "prefix" "orig,vex")
17422   (set_attr "btver2_decode" "double,double")
17423   (set_attr "mode" "TI")])
17424
17425(define_insn "aesimc"
17426  [(set (match_operand:V2DI 0 "register_operand" "=x")
17427	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
17428		      UNSPEC_AESIMC))]
17429  "TARGET_AES"
17430  "%vaesimc\t{%1, %0|%0, %1}"
17431  [(set_attr "type" "sselog1")
17432   (set_attr "prefix_extra" "1")
17433   (set_attr "prefix" "maybe_vex")
17434   (set_attr "mode" "TI")])
17435
17436(define_insn "aeskeygenassist"
17437  [(set (match_operand:V2DI 0 "register_operand" "=x")
17438	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
17439		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
17440		     UNSPEC_AESKEYGENASSIST))]
17441  "TARGET_AES"
17442  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
17443  [(set_attr "type" "sselog1")
17444   (set_attr "prefix_extra" "1")
17445   (set_attr "length_immediate" "1")
17446   (set_attr "prefix" "maybe_vex")
17447   (set_attr "mode" "TI")])
17448
17449(define_insn "pclmulqdq"
17450  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17451	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17452		      (match_operand:V2DI 2 "vector_operand" "xBm,xm")
17453		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17454		     UNSPEC_PCLMUL))]
17455  "TARGET_PCLMUL"
17456  "@
17457   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17458   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17459  [(set_attr "isa" "noavx,avx")
17460   (set_attr "type" "sselog1")
17461   (set_attr "prefix_extra" "1")
17462   (set_attr "length_immediate" "1")
17463   (set_attr "prefix" "orig,vex")
17464   (set_attr "mode" "TI")])
17465
17466(define_expand "avx_vzeroall"
17467  [(match_par_dup 0 [(const_int 0)])]
17468  "TARGET_AVX"
17469{
17470  int nregs = TARGET_64BIT ? 16 : 8;
17471  int regno;
17472
17473  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17474
17475  XVECEXP (operands[0], 0, 0)
17476    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17477			       UNSPECV_VZEROALL);
17478
17479  for (regno = 0; regno < nregs; regno++)
17480    XVECEXP (operands[0], 0, regno + 1)
17481      = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
17482		     CONST0_RTX (V8SImode));
17483})
17484
17485(define_insn "*avx_vzeroall"
17486  [(match_parallel 0 "vzeroall_operation"
17487    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
17488  "TARGET_AVX"
17489  "vzeroall"
17490  [(set_attr "type" "sse")
17491   (set_attr "modrm" "0")
17492   (set_attr "memory" "none")
17493   (set_attr "prefix" "vex")
17494   (set_attr "btver2_decode" "vector")
17495   (set_attr "mode" "OI")])
17496
17497;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17498;; if the upper 128bits are unused.
17499(define_insn "avx_vzeroupper"
17500  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
17501  "TARGET_AVX"
17502  "vzeroupper"
17503  [(set_attr "type" "sse")
17504   (set_attr "modrm" "0")
17505   (set_attr "memory" "none")
17506   (set_attr "prefix" "vex")
17507   (set_attr "btver2_decode" "vector")
17508   (set_attr "mode" "OI")])
17509
17510(define_mode_attr pbroadcast_evex_isa
17511  [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
17512   (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
17513   (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
17514   (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
17515
17516(define_insn "avx2_pbroadcast<mode>"
17517  [(set (match_operand:VI 0 "register_operand" "=x,v")
17518	(vec_duplicate:VI
17519	  (vec_select:<ssescalarmode>
17520	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
17521	    (parallel [(const_int 0)]))))]
17522  "TARGET_AVX2"
17523  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
17524  [(set_attr "isa" "*,<pbroadcast_evex_isa>")
17525   (set_attr "type" "ssemov")
17526   (set_attr "prefix_extra" "1")
17527   (set_attr "prefix" "vex,evex")
17528   (set_attr "mode" "<sseinsnmode>")])
17529
17530(define_insn "avx2_pbroadcast<mode>_1"
17531  [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
17532	(vec_duplicate:VI_256
17533	  (vec_select:<ssescalarmode>
17534	    (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
17535	    (parallel [(const_int 0)]))))]
17536  "TARGET_AVX2"
17537  "@
17538   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17539   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17540   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17541   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
17542  [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
17543   (set_attr "type" "ssemov")
17544   (set_attr "prefix_extra" "1")
17545   (set_attr "prefix" "vex")
17546   (set_attr "mode" "<sseinsnmode>")])
17547
17548(define_insn "<avx2_avx512>_permvar<mode><mask_name>"
17549  [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17550	(unspec:VI48F_256_512
17551	  [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17552	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17553	  UNSPEC_VPERMVAR))]
17554  "TARGET_AVX2 && <mask_mode512bit_condition>"
17555  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17556  [(set_attr "type" "sselog")
17557   (set_attr "prefix" "<mask_prefix2>")
17558   (set_attr "mode" "<sseinsnmode>")])
17559
17560(define_insn "<avx512>_permvar<mode><mask_name>"
17561  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17562	(unspec:VI1_AVX512VL
17563	  [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17564	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17565	  UNSPEC_VPERMVAR))]
17566  "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17567  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17568  [(set_attr "type" "sselog")
17569   (set_attr "prefix" "<mask_prefix2>")
17570   (set_attr "mode" "<sseinsnmode>")])
17571
17572(define_insn "<avx512>_permvar<mode><mask_name>"
17573  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17574	(unspec:VI2_AVX512VL
17575	  [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17576	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17577	  UNSPEC_VPERMVAR))]
17578  "TARGET_AVX512BW && <mask_mode512bit_condition>"
17579  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17580  [(set_attr "type" "sselog")
17581   (set_attr "prefix" "<mask_prefix2>")
17582   (set_attr "mode" "<sseinsnmode>")])
17583
17584(define_expand "avx2_perm<mode>"
17585  [(match_operand:VI8F_256 0 "register_operand")
17586   (match_operand:VI8F_256 1 "nonimmediate_operand")
17587   (match_operand:SI 2 "const_0_to_255_operand")]
17588  "TARGET_AVX2"
17589{
17590  int mask = INTVAL (operands[2]);
17591  emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
17592				    GEN_INT ((mask >> 0) & 3),
17593				    GEN_INT ((mask >> 2) & 3),
17594				    GEN_INT ((mask >> 4) & 3),
17595				    GEN_INT ((mask >> 6) & 3)));
17596  DONE;
17597})
17598
17599(define_expand "avx512vl_perm<mode>_mask"
17600  [(match_operand:VI8F_256 0 "register_operand")
17601   (match_operand:VI8F_256 1 "nonimmediate_operand")
17602   (match_operand:SI 2 "const_0_to_255_operand")
17603   (match_operand:VI8F_256 3 "vector_move_operand")
17604   (match_operand:<avx512fmaskmode> 4 "register_operand")]
17605  "TARGET_AVX512VL"
17606{
17607  int mask = INTVAL (operands[2]);
17608  emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
17609						  GEN_INT ((mask >> 0) & 3),
17610						  GEN_INT ((mask >> 2) & 3),
17611						  GEN_INT ((mask >> 4) & 3),
17612						  GEN_INT ((mask >> 6) & 3),
17613						  operands[3], operands[4]));
17614  DONE;
17615})
17616
17617(define_insn "avx2_perm<mode>_1<mask_name>"
17618  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17619	(vec_select:VI8F_256
17620	  (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
17621	  (parallel [(match_operand 2 "const_0_to_3_operand")
17622		     (match_operand 3 "const_0_to_3_operand")
17623		     (match_operand 4 "const_0_to_3_operand")
17624		     (match_operand 5 "const_0_to_3_operand")])))]
17625  "TARGET_AVX2 && <mask_mode512bit_condition>"
17626{
17627  int mask = 0;
17628  mask |= INTVAL (operands[2]) << 0;
17629  mask |= INTVAL (operands[3]) << 2;
17630  mask |= INTVAL (operands[4]) << 4;
17631  mask |= INTVAL (operands[5]) << 6;
17632  operands[2] = GEN_INT (mask);
17633  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
17634}
17635  [(set_attr "type" "sselog")
17636   (set_attr "prefix" "<mask_prefix2>")
17637   (set_attr "mode" "<sseinsnmode>")])
17638
17639(define_expand "avx512f_perm<mode>"
17640  [(match_operand:V8FI 0 "register_operand")
17641   (match_operand:V8FI 1 "nonimmediate_operand")
17642   (match_operand:SI 2 "const_0_to_255_operand")]
17643  "TARGET_AVX512F"
17644{
17645  int mask = INTVAL (operands[2]);
17646  emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
17647				       GEN_INT ((mask >> 0) & 3),
17648				       GEN_INT ((mask >> 2) & 3),
17649				       GEN_INT ((mask >> 4) & 3),
17650				       GEN_INT ((mask >> 6) & 3),
17651				       GEN_INT (((mask >> 0) & 3) + 4),
17652				       GEN_INT (((mask >> 2) & 3) + 4),
17653				       GEN_INT (((mask >> 4) & 3) + 4),
17654				       GEN_INT (((mask >> 6) & 3) + 4)));
17655  DONE;
17656})
17657
17658(define_expand "avx512f_perm<mode>_mask"
17659  [(match_operand:V8FI 0 "register_operand")
17660   (match_operand:V8FI 1 "nonimmediate_operand")
17661   (match_operand:SI 2 "const_0_to_255_operand")
17662   (match_operand:V8FI 3 "vector_move_operand")
17663   (match_operand:<avx512fmaskmode> 4 "register_operand")]
17664  "TARGET_AVX512F"
17665{
17666  int mask = INTVAL (operands[2]);
17667  emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
17668					    GEN_INT ((mask >> 0) & 3),
17669					    GEN_INT ((mask >> 2) & 3),
17670					    GEN_INT ((mask >> 4) & 3),
17671					    GEN_INT ((mask >> 6) & 3),
17672					    GEN_INT (((mask >> 0) & 3) + 4),
17673					    GEN_INT (((mask >> 2) & 3) + 4),
17674					    GEN_INT (((mask >> 4) & 3) + 4),
17675					    GEN_INT (((mask >> 6) & 3) + 4),
17676					    operands[3], operands[4]));
17677  DONE;
17678})
17679
17680(define_insn "avx512f_perm<mode>_1<mask_name>"
17681  [(set (match_operand:V8FI 0 "register_operand" "=v")
17682	(vec_select:V8FI
17683	  (match_operand:V8FI 1 "nonimmediate_operand" "vm")
17684	  (parallel [(match_operand 2 "const_0_to_3_operand")
17685		     (match_operand 3 "const_0_to_3_operand")
17686		     (match_operand 4 "const_0_to_3_operand")
17687		     (match_operand 5 "const_0_to_3_operand")
17688		     (match_operand 6 "const_4_to_7_operand")
17689		     (match_operand 7 "const_4_to_7_operand")
17690		     (match_operand 8 "const_4_to_7_operand")
17691		     (match_operand 9 "const_4_to_7_operand")])))]
17692  "TARGET_AVX512F && <mask_mode512bit_condition>
17693   && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
17694       && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
17695       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
17696       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
17697{
17698  int mask = 0;
17699  mask |= INTVAL (operands[2]) << 0;
17700  mask |= INTVAL (operands[3]) << 2;
17701  mask |= INTVAL (operands[4]) << 4;
17702  mask |= INTVAL (operands[5]) << 6;
17703  operands[2] = GEN_INT (mask);
17704  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
17705}
17706  [(set_attr "type" "sselog")
17707   (set_attr "prefix" "<mask_prefix2>")
17708   (set_attr "mode" "<sseinsnmode>")])
17709
17710(define_insn "avx2_permv2ti"
17711  [(set (match_operand:V4DI 0 "register_operand" "=x")
17712	(unspec:V4DI
17713	  [(match_operand:V4DI 1 "register_operand" "x")
17714	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
17715	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
17716	  UNSPEC_VPERMTI))]
17717  "TARGET_AVX2"
17718  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17719  [(set_attr "type" "sselog")
17720   (set_attr "prefix" "vex")
17721   (set_attr "mode" "OI")])
17722
17723(define_insn "avx2_vec_dupv4df"
17724  [(set (match_operand:V4DF 0 "register_operand" "=v")
17725	(vec_duplicate:V4DF
17726	  (vec_select:DF
17727	    (match_operand:V2DF 1 "register_operand" "v")
17728	    (parallel [(const_int 0)]))))]
17729  "TARGET_AVX2"
17730  "vbroadcastsd\t{%1, %0|%0, %1}"
17731  [(set_attr "type" "sselog1")
17732   (set_attr "prefix" "maybe_evex")
17733   (set_attr "mode" "V4DF")])
17734
17735(define_insn "<avx512>_vec_dup<mode>_1"
17736  [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17737	(vec_duplicate:VI_AVX512BW
17738	  (vec_select:<ssescalarmode>
17739	    (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17740	    (parallel [(const_int 0)]))))]
17741  "TARGET_AVX512F"
17742  "@
17743   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17744   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
17745  [(set_attr "type" "ssemov")
17746   (set_attr "prefix" "evex")
17747   (set_attr "mode" "<sseinsnmode>")])
17748
17749(define_insn "<avx512>_vec_dup<mode><mask_name>"
17750  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17751	(vec_duplicate:V48_AVX512VL
17752	  (vec_select:<ssescalarmode>
17753	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17754	    (parallel [(const_int 0)]))))]
17755  "TARGET_AVX512F"
17756{
17757  /*  There is no DF broadcast (in AVX-512*) to 128b register.
17758      Mimic it with integer variant.  */
17759  if (<MODE>mode == V2DFmode)
17760    return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17761
17762  return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
17763}
17764  [(set_attr "type" "ssemov")
17765   (set_attr "prefix" "evex")
17766   (set_attr "mode" "<sseinsnmode>")])
17767
17768(define_insn "<avx512>_vec_dup<mode><mask_name>"
17769  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17770	(vec_duplicate:VI12_AVX512VL
17771	  (vec_select:<ssescalarmode>
17772	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17773	    (parallel [(const_int 0)]))))]
17774  "TARGET_AVX512BW"
17775  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
17776  [(set_attr "type" "ssemov")
17777   (set_attr "prefix" "evex")
17778   (set_attr "mode" "<sseinsnmode>")])
17779
17780(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17781  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17782	(vec_duplicate:V16FI
17783	  (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17784  "TARGET_AVX512F"
17785  "@
17786   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17787   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17788  [(set_attr "type" "ssemov")
17789   (set_attr "prefix" "evex")
17790   (set_attr "mode" "<sseinsnmode>")])
17791
17792(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17793  [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17794	(vec_duplicate:V8FI
17795	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17796  "TARGET_AVX512F"
17797  "@
17798   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17799   vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17800  [(set_attr "type" "ssemov")
17801   (set_attr "prefix" "evex")
17802   (set_attr "mode" "<sseinsnmode>")])
17803
17804(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17805  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
17806	(vec_duplicate:VI12_AVX512VL
17807	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17808  "TARGET_AVX512BW"
17809  "@
17810   vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17811   vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
17812  [(set_attr "type" "ssemov")
17813   (set_attr "prefix" "evex")
17814   (set_attr "mode" "<sseinsnmode>")])
17815
17816(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17817  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
17818	(vec_duplicate:V48_AVX512VL
17819	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17820  "TARGET_AVX512F"
17821  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17822  [(set_attr "type" "ssemov")
17823   (set_attr "prefix" "evex")
17824   (set_attr "mode" "<sseinsnmode>")
17825   (set (attr "enabled")
17826     (if_then_else (eq_attr "alternative" "1")
17827	(symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17828		     && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17829	(const_int 1)))])
17830
17831(define_insn "vec_dupv4sf"
17832  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
17833	(vec_duplicate:V4SF
17834	  (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
17835  "TARGET_SSE"
17836  "@
17837   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17838   vbroadcastss\t{%1, %0|%0, %1}
17839   shufps\t{$0, %0, %0|%0, %0, 0}"
17840  [(set_attr "isa" "avx,avx,noavx")
17841   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17842   (set_attr "length_immediate" "1,0,1")
17843   (set_attr "prefix_extra" "0,1,*")
17844   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
17845   (set_attr "mode" "V4SF")])
17846
17847(define_insn "*vec_dupv4si"
17848  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
17849	(vec_duplicate:V4SI
17850	  (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
17851  "TARGET_SSE"
17852  "@
17853   %vpshufd\t{$0, %1, %0|%0, %1, 0}
17854   vbroadcastss\t{%1, %0|%0, %1}
17855   shufps\t{$0, %0, %0|%0, %0, 0}"
17856  [(set_attr "isa" "sse2,avx,noavx")
17857   (set_attr "type" "sselog1,ssemov,sselog1")
17858   (set_attr "length_immediate" "1,0,1")
17859   (set_attr "prefix_extra" "0,1,*")
17860   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
17861   (set_attr "mode" "TI,V4SF,V4SF")])
17862
17863(define_insn "*vec_dupv2di"
17864  [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,x")
17865	(vec_duplicate:V2DI
17866	  (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))]
17867  "TARGET_SSE"
17868  "@
17869   punpcklqdq\t%0, %0
17870   vpunpcklqdq\t{%d1, %0|%0, %d1}
17871   %vmovddup\t{%1, %0|%0, %1}
17872   movlhps\t%0, %0"
17873  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17874   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17875   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
17876   (set_attr "mode" "TI,TI,DF,V4SF")])
17877
17878(define_insn "avx2_vbroadcasti128_<mode>"
17879  [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
17880	(vec_concat:VI_256
17881	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
17882	  (match_dup 1)))]
17883  "TARGET_AVX2"
17884  "@
17885   vbroadcasti128\t{%1, %0|%0, %1}
17886   vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17887   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
17888  [(set_attr "isa" "*,avx512dq,avx512vl")
17889   (set_attr "type" "ssemov")
17890   (set_attr "prefix_extra" "1")
17891   (set_attr "prefix" "vex,evex,evex")
17892   (set_attr "mode" "OI")])
17893
17894;; Modes handled by AVX vec_dup patterns.
17895(define_mode_iterator AVX_VEC_DUP_MODE
17896  [V8SI V8SF V4DI V4DF])
17897(define_mode_attr vecdupssescalarmodesuffix
17898  [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
17899;; Modes handled by AVX2 vec_dup patterns.
17900(define_mode_iterator AVX2_VEC_DUP_MODE
17901  [V32QI V16QI V16HI V8HI V8SI V4SI])
17902
17903(define_insn "*vec_dup<mode>"
17904  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
17905	(vec_duplicate:AVX2_VEC_DUP_MODE
17906	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
17907  "TARGET_AVX2"
17908  "@
17909   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17910   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17911   #"
17912  [(set_attr "isa" "*,*,noavx512vl")
17913   (set_attr "type" "ssemov")
17914   (set_attr "prefix_extra" "1")
17915   (set_attr "prefix" "maybe_evex")
17916   (set_attr "mode" "<sseinsnmode>")])
17917
17918(define_insn "vec_dup<mode>"
17919  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
17920	(vec_duplicate:AVX_VEC_DUP_MODE
17921	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
17922  "TARGET_AVX"
17923  "@
17924   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17925   vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
17926   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17927   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
17928   #"
17929  [(set_attr "type" "ssemov")
17930   (set_attr "prefix_extra" "1")
17931   (set_attr "prefix" "maybe_evex")
17932   (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17933   (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
17934
17935(define_split
17936  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17937	(vec_duplicate:AVX2_VEC_DUP_MODE
17938	  (match_operand:<ssescalarmode> 1 "register_operand")))]
17939  "TARGET_AVX2
17940   /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17941      available, because then we can broadcast from GPRs directly.
17942      For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17943      for V*SI mode it requires just -mavx512vl.  */
17944   && !(TARGET_AVX512VL
17945	&& (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17946   && reload_completed && GENERAL_REG_P (operands[1])"
17947  [(const_int 0)]
17948{
17949  emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17950				CONST0_RTX (V4SImode),
17951				gen_lowpart (SImode, operands[1])));
17952  emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17953					gen_lowpart (<ssexmmmode>mode,
17954						     operands[0])));
17955  DONE;
17956})
17957
17958(define_split
17959  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
17960	(vec_duplicate:AVX_VEC_DUP_MODE
17961	  (match_operand:<ssescalarmode> 1 "register_operand")))]
17962  "TARGET_AVX && !TARGET_AVX2 && reload_completed"
17963  [(set (match_dup 2)
17964	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17965   (set (match_dup 0)
17966	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
17967  "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
17968
17969(define_insn "avx_vbroadcastf128_<mode>"
17970  [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
17971	(vec_concat:V_256
17972	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
17973	  (match_dup 1)))]
17974  "TARGET_AVX"
17975  "@
17976   vbroadcast<i128>\t{%1, %0|%0, %1}
17977   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17978   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
17979   vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17980   vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17981   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
17982   vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
17983  [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
17984   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
17985   (set_attr "prefix_extra" "1")
17986   (set_attr "length_immediate" "0,1,1,0,1,0,1")
17987   (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
17988   (set_attr "mode" "<sseinsnmode>")])
17989
17990;; For broadcast[i|f]32x2.  Yes there is no v4sf version, only v4si.
17991(define_mode_iterator VI4F_BRCST32x2
17992  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
17993   V16SF (V8SF "TARGET_AVX512VL")])
17994
17995(define_mode_attr 64x2mode
17996  [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
17997
17998(define_mode_attr 32x2mode
17999  [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
18000  (V8SF "V2SF") (V4SI "V2SI")])
18001
18002(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
18003  [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
18004	(vec_duplicate:VI4F_BRCST32x2
18005	  (vec_select:<32x2mode>
18006	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
18007	    (parallel [(const_int 0) (const_int 1)]))))]
18008  "TARGET_AVX512DQ"
18009  "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
18010  [(set_attr "type" "ssemov")
18011   (set_attr "prefix_extra" "1")
18012   (set_attr "prefix" "evex")
18013   (set_attr "mode" "<sseinsnmode>")])
18014
18015(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
18016  [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
18017        (vec_duplicate:VI4F_256
18018         (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
18019  "TARGET_AVX512VL"
18020  "@
18021   vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
18022   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18023  [(set_attr "type" "ssemov")
18024   (set_attr "prefix_extra" "1")
18025   (set_attr "prefix" "evex")
18026   (set_attr "mode" "<sseinsnmode>")])
18027
18028(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
18029  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
18030       (vec_duplicate:V16FI
18031         (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
18032  "TARGET_AVX512DQ"
18033  "@
18034   vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
18035   vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18036  [(set_attr "type" "ssemov")
18037   (set_attr "prefix_extra" "1")
18038   (set_attr "prefix" "evex")
18039   (set_attr "mode" "<sseinsnmode>")])
18040
18041;; For broadcast[i|f]64x2
18042(define_mode_iterator VI8F_BRCST64x2
18043  [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
18044
18045(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
18046  [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
18047       (vec_duplicate:VI8F_BRCST64x2
18048         (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
18049  "TARGET_AVX512DQ"
18050  "@
18051   vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
18052   vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18053  [(set_attr "type" "ssemov")
18054   (set_attr "prefix_extra" "1")
18055   (set_attr "prefix" "evex")
18056   (set_attr "mode" "<sseinsnmode>")])
18057
18058(define_insn "avx512cd_maskb_vec_dup<mode>"
18059  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18060	(vec_duplicate:VI8_AVX512VL
18061	  (zero_extend:DI
18062	    (match_operand:QI 1 "register_operand" "Yk"))))]
18063  "TARGET_AVX512CD"
18064  "vpbroadcastmb2q\t{%1, %0|%0, %1}"
18065  [(set_attr "type" "mskmov")
18066   (set_attr "prefix" "evex")
18067   (set_attr "mode" "XI")])
18068
18069(define_insn "avx512cd_maskw_vec_dup<mode>"
18070  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
18071	(vec_duplicate:VI4_AVX512VL
18072	  (zero_extend:SI
18073	    (match_operand:HI 1 "register_operand" "Yk"))))]
18074  "TARGET_AVX512CD"
18075  "vpbroadcastmw2d\t{%1, %0|%0, %1}"
18076  [(set_attr "type" "mskmov")
18077   (set_attr "prefix" "evex")
18078   (set_attr "mode" "XI")])
18079
18080;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
18081;; If it so happens that the input is in memory, use vbroadcast.
18082;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
18083(define_insn "*avx_vperm_broadcast_v4sf"
18084  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
18085	(vec_select:V4SF
18086	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
18087	  (match_parallel 2 "avx_vbroadcast_operand"
18088	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
18089  "TARGET_AVX"
18090{
18091  int elt = INTVAL (operands[3]);
18092  switch (which_alternative)
18093    {
18094    case 0:
18095    case 1:
18096      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
18097      return "vbroadcastss\t{%1, %0|%0, %k1}";
18098    case 2:
18099      operands[2] = GEN_INT (elt * 0x55);
18100      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
18101    default:
18102      gcc_unreachable ();
18103    }
18104}
18105  [(set_attr "type" "ssemov,ssemov,sselog1")
18106   (set_attr "prefix_extra" "1")
18107   (set_attr "length_immediate" "0,0,1")
18108   (set_attr "prefix" "maybe_evex")
18109   (set_attr "mode" "SF,SF,V4SF")])
18110
18111(define_insn_and_split "*avx_vperm_broadcast_<mode>"
18112  [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
18113	(vec_select:VF_256
18114	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
18115	  (match_parallel 2 "avx_vbroadcast_operand"
18116	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
18117  "TARGET_AVX"
18118  "#"
18119  "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
18120  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
18121{
18122  rtx op0 = operands[0], op1 = operands[1];
18123  int elt = INTVAL (operands[3]);
18124
18125  if (REG_P (op1))
18126    {
18127      int mask;
18128
18129      if (TARGET_AVX2 && elt == 0)
18130	{
18131	  emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
18132							  op1)));
18133	  DONE;
18134	}
18135
18136      /* Shuffle element we care about into all elements of the 128-bit lane.
18137	 The other lane gets shuffled too, but we don't care.  */
18138      if (<MODE>mode == V4DFmode)
18139	mask = (elt & 1 ? 15 : 0);
18140      else
18141	mask = (elt & 3) * 0x55;
18142      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
18143
18144      /* Shuffle the lane we care about into both lanes of the dest.  */
18145      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
18146      if (EXT_REX_SSE_REG_P (op0))
18147	{
18148	  /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
18149	     or VSHUFF128.  */
18150	  gcc_assert (<MODE>mode == V8SFmode);
18151	  if ((mask & 1) == 0)
18152	    emit_insn (gen_avx2_vec_dupv8sf (op0,
18153					     gen_lowpart (V4SFmode, op0)));
18154	  else
18155	    emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
18156						  GEN_INT (4), GEN_INT (5),
18157						  GEN_INT (6), GEN_INT (7),
18158						  GEN_INT (12), GEN_INT (13),
18159						  GEN_INT (14), GEN_INT (15)));
18160	  DONE;
18161	}
18162
18163      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
18164      DONE;
18165    }
18166
18167  operands[1] = adjust_address (op1, <ssescalarmode>mode,
18168				elt * GET_MODE_SIZE (<ssescalarmode>mode));
18169})
18170
18171(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18172  [(set (match_operand:VF2 0 "register_operand")
18173	(vec_select:VF2
18174	  (match_operand:VF2 1 "nonimmediate_operand")
18175	  (match_operand:SI 2 "const_0_to_255_operand")))]
18176  "TARGET_AVX && <mask_mode512bit_condition>"
18177{
18178  int mask = INTVAL (operands[2]);
18179  rtx perm[<ssescalarnum>];
18180
18181  int i;
18182  for (i = 0; i < <ssescalarnum>; i = i + 2)
18183    {
18184      perm[i]     = GEN_INT (((mask >> i)       & 1) + i);
18185      perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
18186    }
18187
18188  operands[2]
18189    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18190})
18191
18192(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18193  [(set (match_operand:VF1 0 "register_operand")
18194	(vec_select:VF1
18195	  (match_operand:VF1 1 "nonimmediate_operand")
18196	  (match_operand:SI 2 "const_0_to_255_operand")))]
18197  "TARGET_AVX && <mask_mode512bit_condition>"
18198{
18199  int mask = INTVAL (operands[2]);
18200  rtx perm[<ssescalarnum>];
18201
18202  int i;
18203  for (i = 0; i < <ssescalarnum>; i = i + 4)
18204    {
18205      perm[i]     = GEN_INT (((mask >> 0) & 3) + i);
18206      perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
18207      perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
18208      perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
18209    }
18210
18211  operands[2]
18212    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18213})
18214
18215(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
18216  [(set (match_operand:VF 0 "register_operand" "=v")
18217	(vec_select:VF
18218	  (match_operand:VF 1 "nonimmediate_operand" "vm")
18219	  (match_parallel 2 ""
18220	    [(match_operand 3 "const_int_operand")])))]
18221  "TARGET_AVX && <mask_mode512bit_condition>
18222   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
18223{
18224  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
18225  operands[2] = GEN_INT (mask);
18226  return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
18227}
18228  [(set_attr "type" "sselog")
18229   (set_attr "prefix_extra" "1")
18230   (set_attr "length_immediate" "1")
18231   (set_attr "prefix" "<mask_prefix>")
18232   (set_attr "mode" "<sseinsnmode>")])
18233
18234(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
18235  [(set (match_operand:VF 0 "register_operand" "=v")
18236	(unspec:VF
18237	  [(match_operand:VF 1 "register_operand" "v")
18238	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
18239	  UNSPEC_VPERMIL))]
18240  "TARGET_AVX && <mask_mode512bit_condition>"
18241  "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18242  [(set_attr "type" "sselog")
18243   (set_attr "prefix_extra" "1")
18244   (set_attr "btver2_decode" "vector")
18245   (set_attr "prefix" "<mask_prefix>")
18246   (set_attr "mode" "<sseinsnmode>")])
18247
18248(define_mode_iterator VPERMI2
18249  [V16SI V16SF V8DI V8DF
18250   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
18251   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
18252   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
18253   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
18254   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18255   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18256   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18257   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18258
18259(define_mode_iterator VPERMI2I
18260  [V16SI V8DI
18261   (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
18262   (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
18263   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
18264   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
18265   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
18266   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
18267
18268(define_expand "<avx512>_vpermi2var<mode>3_mask"
18269  [(set (match_operand:VPERMI2 0 "register_operand")
18270	(vec_merge:VPERMI2
18271	  (unspec:VPERMI2
18272	    [(match_operand:<sseintvecmode> 2 "register_operand")
18273	     (match_operand:VPERMI2 1 "register_operand")
18274	     (match_operand:VPERMI2 3 "nonimmediate_operand")]
18275	    UNSPEC_VPERMT2)
18276	  (match_dup 5)
18277	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
18278  "TARGET_AVX512F"
18279{
18280  operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
18281  operands[5] = gen_lowpart (<MODE>mode, operands[2]);
18282})
18283
18284(define_insn "*<avx512>_vpermi2var<mode>3_mask"
18285  [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
18286	(vec_merge:VPERMI2I
18287	  (unspec:VPERMI2I
18288	    [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18289	     (match_operand:VPERMI2I 1 "register_operand" "v")
18290	     (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
18291	    UNSPEC_VPERMT2)
18292	  (match_dup 2)
18293	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18294  "TARGET_AVX512F"
18295  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18296  [(set_attr "type" "sselog")
18297   (set_attr "prefix" "evex")
18298   (set_attr "mode" "<sseinsnmode>")])
18299
18300(define_insn "*<avx512>_vpermi2var<mode>3_mask"
18301  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18302	(vec_merge:VF_AVX512VL
18303	  (unspec:VF_AVX512VL
18304	    [(match_operand:<sseintvecmode> 2 "register_operand" "0")
18305	     (match_operand:VF_AVX512VL 1 "register_operand" "v")
18306	     (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
18307	    UNSPEC_VPERMT2)
18308	  (subreg:VF_AVX512VL (match_dup 2) 0)
18309	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18310  "TARGET_AVX512F"
18311  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18312  [(set_attr "type" "sselog")
18313   (set_attr "prefix" "evex")
18314   (set_attr "mode" "<sseinsnmode>")])
18315
18316(define_expand "<avx512>_vpermt2var<mode>3_maskz"
18317  [(match_operand:VPERMI2 0 "register_operand")
18318   (match_operand:<sseintvecmode> 1 "register_operand")
18319   (match_operand:VPERMI2 2 "register_operand")
18320   (match_operand:VPERMI2 3 "nonimmediate_operand")
18321   (match_operand:<avx512fmaskmode> 4 "register_operand")]
18322  "TARGET_AVX512F"
18323{
18324  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18325	operands[0], operands[1], operands[2], operands[3],
18326	CONST0_RTX (<MODE>mode), operands[4]));
18327  DONE;
18328})
18329
18330(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18331  [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
18332	(unspec:VPERMI2
18333	  [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
18334	   (match_operand:VPERMI2 2 "register_operand" "0,v")
18335	   (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
18336	  UNSPEC_VPERMT2))]
18337  "TARGET_AVX512F"
18338  "@
18339   vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
18340   vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18341  [(set_attr "type" "sselog")
18342   (set_attr "prefix" "evex")
18343   (set_attr "mode" "<sseinsnmode>")])
18344
18345(define_insn "<avx512>_vpermt2var<mode>3_mask"
18346  [(set (match_operand:VPERMI2 0 "register_operand" "=v")
18347	(vec_merge:VPERMI2
18348	  (unspec:VPERMI2
18349	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18350	    (match_operand:VPERMI2 2 "register_operand" "0")
18351	    (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
18352	    UNSPEC_VPERMT2)
18353	  (match_dup 2)
18354	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18355  "TARGET_AVX512F"
18356  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18357  [(set_attr "type" "sselog")
18358   (set_attr "prefix" "evex")
18359   (set_attr "mode" "<sseinsnmode>")])
18360
18361(define_expand "avx_vperm2f128<mode>3"
18362  [(set (match_operand:AVX256MODE2P 0 "register_operand")
18363	(unspec:AVX256MODE2P
18364	  [(match_operand:AVX256MODE2P 1 "register_operand")
18365	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
18366	   (match_operand:SI 3 "const_0_to_255_operand")]
18367	  UNSPEC_VPERMIL2F128))]
18368  "TARGET_AVX"
18369{
18370  int mask = INTVAL (operands[3]);
18371  if ((mask & 0x88) == 0)
18372    {
18373      rtx perm[<ssescalarnum>], t1, t2;
18374      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
18375
18376      base = (mask & 3) * nelt2;
18377      for (i = 0; i < nelt2; ++i)
18378	perm[i] = GEN_INT (base + i);
18379
18380      base = ((mask >> 4) & 3) * nelt2;
18381      for (i = 0; i < nelt2; ++i)
18382	perm[i + nelt2] = GEN_INT (base + i);
18383
18384      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
18385			       operands[1], operands[2]);
18386      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
18387      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
18388      t2 = gen_rtx_SET (operands[0], t2);
18389      emit_insn (t2);
18390      DONE;
18391    }
18392})
18393
18394;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
18395;; means that in order to represent this properly in rtl we'd have to
18396;; nest *another* vec_concat with a zero operand and do the select from
18397;; a 4x wide vector.  That doesn't seem very nice.
18398(define_insn "*avx_vperm2f128<mode>_full"
18399  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18400	(unspec:AVX256MODE2P
18401	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
18402	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
18403	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
18404	  UNSPEC_VPERMIL2F128))]
18405  "TARGET_AVX"
18406  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18407  [(set_attr "type" "sselog")
18408   (set_attr "prefix_extra" "1")
18409   (set_attr "length_immediate" "1")
18410   (set_attr "prefix" "vex")
18411   (set_attr "mode" "<sseinsnmode>")])
18412
18413(define_insn "*avx_vperm2f128<mode>_nozero"
18414  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18415	(vec_select:AVX256MODE2P
18416	  (vec_concat:<ssedoublevecmode>
18417	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
18418	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
18419	  (match_parallel 3 ""
18420	    [(match_operand 4 "const_int_operand")])))]
18421  "TARGET_AVX
18422   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
18423{
18424  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
18425  if (mask == 0x12)
18426    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18427  if (mask == 0x20)
18428    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
18429  operands[3] = GEN_INT (mask);
18430  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18431}
18432  [(set_attr "type" "sselog")
18433   (set_attr "prefix_extra" "1")
18434   (set_attr "length_immediate" "1")
18435   (set_attr "prefix" "vex")
18436   (set_attr "mode" "<sseinsnmode>")])
18437
18438(define_insn "*ssse3_palignr<mode>_perm"
18439  [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
18440      (vec_select:V_128
18441	(match_operand:V_128 1 "register_operand" "0,x,v")
18442	(match_parallel 2 "palignr_operand"
18443	  [(match_operand 3 "const_int_operand" "n,n,n")])))]
18444  "TARGET_SSSE3"
18445{
18446  operands[2] = (GEN_INT (INTVAL (operands[3])
18447		 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
18448
18449  switch (which_alternative)
18450    {
18451    case 0:
18452      return "palignr\t{%2, %1, %0|%0, %1, %2}";
18453    case 1:
18454    case 2:
18455      return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18456    default:
18457      gcc_unreachable ();
18458    }
18459}
18460  [(set_attr "isa" "noavx,avx,avx512bw")
18461   (set_attr "type" "sseishft")
18462   (set_attr "atom_unit" "sishuf")
18463   (set_attr "prefix_data16" "1,*,*")
18464   (set_attr "prefix_extra" "1")
18465   (set_attr "length_immediate" "1")
18466   (set_attr "prefix" "orig,vex,evex")])
18467
18468(define_expand "avx512vl_vinsert<mode>"
18469  [(match_operand:VI48F_256 0 "register_operand")
18470   (match_operand:VI48F_256 1 "register_operand")
18471   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18472   (match_operand:SI 3 "const_0_to_1_operand")
18473   (match_operand:VI48F_256 4 "register_operand")
18474   (match_operand:<avx512fmaskmode> 5 "register_operand")]
18475  "TARGET_AVX512VL"
18476{
18477  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18478
18479  switch (INTVAL (operands[3]))
18480    {
18481    case 0:
18482      insn = gen_vec_set_lo_<mode>_mask;
18483      break;
18484    case 1:
18485      insn = gen_vec_set_hi_<mode>_mask;
18486      break;
18487    default:
18488      gcc_unreachable ();
18489    }
18490
18491  emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18492		   operands[5]));
18493  DONE;
18494})
18495
18496(define_expand "avx_vinsertf128<mode>"
18497  [(match_operand:V_256 0 "register_operand")
18498   (match_operand:V_256 1 "register_operand")
18499   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18500   (match_operand:SI 3 "const_0_to_1_operand")]
18501  "TARGET_AVX"
18502{
18503  rtx (*insn)(rtx, rtx, rtx);
18504
18505  switch (INTVAL (operands[3]))
18506    {
18507    case 0:
18508      insn = gen_vec_set_lo_<mode>;
18509      break;
18510    case 1:
18511      insn = gen_vec_set_hi_<mode>;
18512      break;
18513    default:
18514      gcc_unreachable ();
18515    }
18516
18517  emit_insn (insn (operands[0], operands[1], operands[2]));
18518  DONE;
18519})
18520
18521(define_insn "vec_set_lo_<mode><mask_name>"
18522  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18523	(vec_concat:VI8F_256
18524	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18525	  (vec_select:<ssehalfvecmode>
18526	    (match_operand:VI8F_256 1 "register_operand" "v")
18527	    (parallel [(const_int 2) (const_int 3)]))))]
18528  "TARGET_AVX && <mask_avx512dq_condition>"
18529{
18530  if (TARGET_AVX512DQ)
18531    return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18532  else if (TARGET_AVX512VL)
18533    return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18534  else
18535    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18536}
18537  [(set_attr "type" "sselog")
18538   (set_attr "prefix_extra" "1")
18539   (set_attr "length_immediate" "1")
18540   (set_attr "prefix" "vex")
18541   (set_attr "mode" "<sseinsnmode>")])
18542
18543(define_insn "vec_set_hi_<mode><mask_name>"
18544  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18545	(vec_concat:VI8F_256
18546	  (vec_select:<ssehalfvecmode>
18547	    (match_operand:VI8F_256 1 "register_operand" "v")
18548	    (parallel [(const_int 0) (const_int 1)]))
18549	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18550  "TARGET_AVX && <mask_avx512dq_condition>"
18551{
18552  if (TARGET_AVX512DQ)
18553    return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18554  else if (TARGET_AVX512VL)
18555    return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18556  else
18557    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18558}
18559  [(set_attr "type" "sselog")
18560   (set_attr "prefix_extra" "1")
18561   (set_attr "length_immediate" "1")
18562   (set_attr "prefix" "vex")
18563   (set_attr "mode" "<sseinsnmode>")])
18564
18565(define_insn "vec_set_lo_<mode><mask_name>"
18566  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18567	(vec_concat:VI4F_256
18568	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18569	  (vec_select:<ssehalfvecmode>
18570	    (match_operand:VI4F_256 1 "register_operand" "v")
18571	    (parallel [(const_int 4) (const_int 5)
18572		       (const_int 6) (const_int 7)]))))]
18573  "TARGET_AVX"
18574{
18575  if (TARGET_AVX512VL)
18576    return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18577  else
18578    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18579}
18580  [(set_attr "type" "sselog")
18581   (set_attr "prefix_extra" "1")
18582   (set_attr "length_immediate" "1")
18583   (set_attr "prefix" "vex")
18584   (set_attr "mode" "<sseinsnmode>")])
18585
18586(define_insn "vec_set_hi_<mode><mask_name>"
18587  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18588	(vec_concat:VI4F_256
18589	  (vec_select:<ssehalfvecmode>
18590	    (match_operand:VI4F_256 1 "register_operand" "v")
18591	    (parallel [(const_int 0) (const_int 1)
18592		       (const_int 2) (const_int 3)]))
18593	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18594  "TARGET_AVX"
18595{
18596  if (TARGET_AVX512VL)
18597    return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18598  else
18599    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18600}
18601  [(set_attr "type" "sselog")
18602   (set_attr "prefix_extra" "1")
18603   (set_attr "length_immediate" "1")
18604   (set_attr "prefix" "vex")
18605   (set_attr "mode" "<sseinsnmode>")])
18606
18607(define_insn "vec_set_lo_v16hi"
18608  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18609	(vec_concat:V16HI
18610	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
18611	  (vec_select:V8HI
18612	    (match_operand:V16HI 1 "register_operand" "x,v")
18613	    (parallel [(const_int 8) (const_int 9)
18614		       (const_int 10) (const_int 11)
18615		       (const_int 12) (const_int 13)
18616		       (const_int 14) (const_int 15)]))))]
18617  "TARGET_AVX"
18618  "@
18619   vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18620   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18621  [(set_attr "type" "sselog")
18622   (set_attr "prefix_extra" "1")
18623   (set_attr "length_immediate" "1")
18624   (set_attr "prefix" "vex,evex")
18625   (set_attr "mode" "OI")])
18626
18627(define_insn "vec_set_hi_v16hi"
18628  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18629	(vec_concat:V16HI
18630	  (vec_select:V8HI
18631	    (match_operand:V16HI 1 "register_operand" "x,v")
18632	    (parallel [(const_int 0) (const_int 1)
18633		       (const_int 2) (const_int 3)
18634		       (const_int 4) (const_int 5)
18635		       (const_int 6) (const_int 7)]))
18636	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
18637  "TARGET_AVX"
18638  "@
18639   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18640   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18641  [(set_attr "type" "sselog")
18642   (set_attr "prefix_extra" "1")
18643   (set_attr "length_immediate" "1")
18644   (set_attr "prefix" "vex,evex")
18645   (set_attr "mode" "OI")])
18646
18647(define_insn "vec_set_lo_v32qi"
18648  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18649	(vec_concat:V32QI
18650	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
18651	  (vec_select:V16QI
18652	    (match_operand:V32QI 1 "register_operand" "x,v")
18653	    (parallel [(const_int 16) (const_int 17)
18654		       (const_int 18) (const_int 19)
18655		       (const_int 20) (const_int 21)
18656		       (const_int 22) (const_int 23)
18657		       (const_int 24) (const_int 25)
18658		       (const_int 26) (const_int 27)
18659		       (const_int 28) (const_int 29)
18660		       (const_int 30) (const_int 31)]))))]
18661  "TARGET_AVX"
18662  "@
18663   vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18664   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18665  [(set_attr "type" "sselog")
18666   (set_attr "prefix_extra" "1")
18667   (set_attr "length_immediate" "1")
18668   (set_attr "prefix" "vex,evex")
18669   (set_attr "mode" "OI")])
18670
18671(define_insn "vec_set_hi_v32qi"
18672  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18673	(vec_concat:V32QI
18674	  (vec_select:V16QI
18675	    (match_operand:V32QI 1 "register_operand" "x,v")
18676	    (parallel [(const_int 0) (const_int 1)
18677		       (const_int 2) (const_int 3)
18678		       (const_int 4) (const_int 5)
18679		       (const_int 6) (const_int 7)
18680		       (const_int 8) (const_int 9)
18681		       (const_int 10) (const_int 11)
18682		       (const_int 12) (const_int 13)
18683		       (const_int 14) (const_int 15)]))
18684	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
18685  "TARGET_AVX"
18686  "@
18687   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18688   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18689  [(set_attr "type" "sselog")
18690   (set_attr "prefix_extra" "1")
18691   (set_attr "length_immediate" "1")
18692   (set_attr "prefix" "vex,evex")
18693   (set_attr "mode" "OI")])
18694
18695(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18696  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
18697	(unspec:V48_AVX2
18698	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18699	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
18700	  UNSPEC_MASKMOV))]
18701  "TARGET_AVX"
18702  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
18703  [(set_attr "type" "sselog1")
18704   (set_attr "prefix_extra" "1")
18705   (set_attr "prefix" "vex")
18706   (set_attr "btver2_decode" "vector")
18707   (set_attr "mode" "<sseinsnmode>")])
18708
18709(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
18710  [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
18711	(unspec:V48_AVX2
18712	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
18713	   (match_operand:V48_AVX2 2 "register_operand" "x")
18714	   (match_dup 0)]
18715	  UNSPEC_MASKMOV))]
18716  "TARGET_AVX"
18717  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18718  [(set_attr "type" "sselog1")
18719   (set_attr "prefix_extra" "1")
18720   (set_attr "prefix" "vex")
18721   (set_attr "btver2_decode" "vector")
18722   (set_attr "mode" "<sseinsnmode>")])
18723
18724(define_expand "maskload<mode><sseintvecmodelower>"
18725  [(set (match_operand:V48_AVX2 0 "register_operand")
18726	(unspec:V48_AVX2
18727	  [(match_operand:<sseintvecmode> 2 "register_operand")
18728	   (match_operand:V48_AVX2 1 "memory_operand")]
18729	  UNSPEC_MASKMOV))]
18730  "TARGET_AVX")
18731
18732(define_expand "maskload<mode><avx512fmaskmodelower>"
18733  [(set (match_operand:V48_AVX512VL 0 "register_operand")
18734	(vec_merge:V48_AVX512VL
18735	  (match_operand:V48_AVX512VL 1 "memory_operand")
18736	  (match_dup 0)
18737	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18738  "TARGET_AVX512F")
18739
18740(define_expand "maskload<mode><avx512fmaskmodelower>"
18741  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18742	(vec_merge:VI12_AVX512VL
18743	  (match_operand:VI12_AVX512VL 1 "memory_operand")
18744	  (match_dup 0)
18745	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18746  "TARGET_AVX512BW")
18747
18748(define_expand "maskstore<mode><sseintvecmodelower>"
18749  [(set (match_operand:V48_AVX2 0 "memory_operand")
18750	(unspec:V48_AVX2
18751	  [(match_operand:<sseintvecmode> 2 "register_operand")
18752	   (match_operand:V48_AVX2 1 "register_operand")
18753	   (match_dup 0)]
18754	  UNSPEC_MASKMOV))]
18755  "TARGET_AVX")
18756
18757(define_expand "maskstore<mode><avx512fmaskmodelower>"
18758  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18759	(vec_merge:V48_AVX512VL
18760	  (match_operand:V48_AVX512VL 1 "register_operand")
18761	  (match_dup 0)
18762	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18763  "TARGET_AVX512F")
18764
18765(define_expand "maskstore<mode><avx512fmaskmodelower>"
18766  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18767	(vec_merge:VI12_AVX512VL
18768	  (match_operand:VI12_AVX512VL 1 "register_operand")
18769	  (match_dup 0)
18770	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18771  "TARGET_AVX512BW")
18772
18773(define_expand "cbranch<mode>4"
18774  [(set (reg:CC FLAGS_REG)
18775	(compare:CC (match_operand:VI48_AVX 1 "register_operand")
18776		    (match_operand:VI48_AVX 2 "nonimmediate_operand")))
18777   (set (pc) (if_then_else
18778	       (match_operator 0 "bt_comparison_operator"
18779		[(reg:CC FLAGS_REG) (const_int 0)])
18780	       (label_ref (match_operand 3))
18781	       (pc)))]
18782  "TARGET_SSE4_1"
18783{
18784  ix86_expand_branch (GET_CODE (operands[0]),
18785		      operands[1], operands[2], operands[3]);
18786  DONE;
18787})
18788
18789
18790(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
18791  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
18792	(unspec:AVX256MODE2P
18793	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18794	  UNSPEC_CAST))]
18795  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18796  "#"
18797  "&& reload_completed"
18798  [(set (match_dup 0) (match_dup 1))]
18799{
18800  if (REG_P (operands[0]))
18801    operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18802  else
18803    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18804				  <ssehalfvecmode>mode);
18805})
18806
18807;; Modes handled by vec_init expanders.
18808(define_mode_iterator VEC_INIT_MODE
18809  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18810   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18811   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18812   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
18813   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18814   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
18815   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
18816
18817;; Likewise, but for initialization from half sized vectors.
18818;; Thus, these are all VEC_INIT_MODE modes except V2??.
18819(define_mode_iterator VEC_INIT_HALF_MODE
18820  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18821   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18822   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18823   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
18824   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18825   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
18826   (V4TI "TARGET_AVX512F")])
18827
18828(define_expand "vec_init<mode><ssescalarmodelower>"
18829  [(match_operand:VEC_INIT_MODE 0 "register_operand")
18830   (match_operand 1)]
18831  "TARGET_SSE"
18832{
18833  ix86_expand_vector_init (false, operands[0], operands[1]);
18834  DONE;
18835})
18836
18837(define_expand "vec_init<mode><ssehalfvecmodelower>"
18838  [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
18839   (match_operand 1)]
18840  "TARGET_SSE"
18841{
18842  ix86_expand_vector_init (false, operands[0], operands[1]);
18843  DONE;
18844})
18845
18846(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18847  [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18848	(ashiftrt:VI48_AVX512F_AVX512VL
18849	  (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18850	  (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
18851  "TARGET_AVX2 && <mask_mode512bit_condition>"
18852  "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18853  [(set_attr "type" "sseishft")
18854   (set_attr "prefix" "maybe_evex")
18855   (set_attr "mode" "<sseinsnmode>")])
18856
18857(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18858  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18859	(ashiftrt:VI2_AVX512VL
18860	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18861	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18862  "TARGET_AVX512BW"
18863  "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18864  [(set_attr "type" "sseishft")
18865   (set_attr "prefix" "maybe_evex")
18866   (set_attr "mode" "<sseinsnmode>")])
18867
18868(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18869  [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18870	(any_lshift:VI48_AVX512F
18871	  (match_operand:VI48_AVX512F 1 "register_operand" "v")
18872	  (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
18873  "TARGET_AVX2 && <mask_mode512bit_condition>"
18874  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18875  [(set_attr "type" "sseishft")
18876   (set_attr "prefix" "maybe_evex")
18877   (set_attr "mode" "<sseinsnmode>")])
18878
18879(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18880  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18881	(any_lshift:VI2_AVX512VL
18882	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18883	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18884  "TARGET_AVX512BW"
18885  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18886  [(set_attr "type" "sseishft")
18887   (set_attr "prefix" "maybe_evex")
18888   (set_attr "mode" "<sseinsnmode>")])
18889
18890(define_insn "avx_vec_concat<mode>"
18891  [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
18892	(vec_concat:V_256_512
18893	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
18894	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,vm,C,C")))]
18895  "TARGET_AVX"
18896{
18897  switch (which_alternative)
18898    {
18899    case 0:
18900      return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18901    case 1:
18902      if (<MODE_SIZE> == 64)
18903	{
18904	  if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
18905	    return "vinsert<shuffletype>32x8\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18906	  else
18907	    return "vinsert<shuffletype>64x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18908	}
18909      else
18910	{
18911	  if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18912	    return "vinsert<shuffletype>64x2\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18913	  else
18914	    return "vinsert<shuffletype>32x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18915	}
18916    case 2:
18917    case 3:
18918      switch (get_attr_mode (insn))
18919	{
18920	case MODE_V16SF:
18921	  return "vmovaps\t{%1, %t0|%t0, %1}";
18922	case MODE_V8DF:
18923	  return "vmovapd\t{%1, %t0|%t0, %1}";
18924	case MODE_V8SF:
18925	  return "vmovaps\t{%1, %x0|%x0, %1}";
18926	case MODE_V4DF:
18927	  return "vmovapd\t{%1, %x0|%x0, %1}";
18928	case MODE_XI:
18929	  if (which_alternative == 2)
18930	    return "vmovdqa\t{%1, %t0|%t0, %1}";
18931	  else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18932	    return "vmovdqa64\t{%1, %t0|%t0, %1}";
18933	  else
18934	    return "vmovdqa32\t{%1, %t0|%t0, %1}";
18935	case MODE_OI:
18936	  if (which_alternative == 2)
18937	    return "vmovdqa\t{%1, %x0|%x0, %1}";
18938	  else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18939	    return "vmovdqa64\t{%1, %x0|%x0, %1}";
18940	  else
18941	    return "vmovdqa32\t{%1, %x0|%x0, %1}";
18942	default:
18943	  gcc_unreachable ();
18944	}
18945    default:
18946      gcc_unreachable ();
18947    }
18948}
18949  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
18950   (set_attr "prefix_extra" "1,1,*,*")
18951   (set_attr "length_immediate" "1,1,*,*")
18952   (set_attr "prefix" "maybe_evex")
18953   (set_attr "mode" "<sseinsnmode>")])
18954
18955(define_insn "vcvtph2ps<mask_name>"
18956  [(set (match_operand:V4SF 0 "register_operand" "=v")
18957	(vec_select:V4SF
18958	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18959		       UNSPEC_VCVTPH2PS)
18960	  (parallel [(const_int 0) (const_int 1)
18961		     (const_int 2) (const_int 3)])))]
18962  "TARGET_F16C || TARGET_AVX512VL"
18963  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18964  [(set_attr "type" "ssecvt")
18965   (set_attr "prefix" "maybe_evex")
18966   (set_attr "mode" "V4SF")])
18967
18968(define_insn "*vcvtph2ps_load<mask_name>"
18969  [(set (match_operand:V4SF 0 "register_operand" "=v")
18970	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18971		     UNSPEC_VCVTPH2PS))]
18972  "TARGET_F16C || TARGET_AVX512VL"
18973  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18974  [(set_attr "type" "ssecvt")
18975   (set_attr "prefix" "vex")
18976   (set_attr "mode" "V8SF")])
18977
18978(define_insn "vcvtph2ps256<mask_name>"
18979  [(set (match_operand:V8SF 0 "register_operand" "=v")
18980	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
18981		     UNSPEC_VCVTPH2PS))]
18982  "TARGET_F16C || TARGET_AVX512VL"
18983  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18984  [(set_attr "type" "ssecvt")
18985   (set_attr "prefix" "vex")
18986   (set_attr "btver2_decode" "double")
18987   (set_attr "mode" "V8SF")])
18988
18989(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18990  [(set (match_operand:V16SF 0 "register_operand" "=v")
18991	(unspec:V16SF
18992	  [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18993	  UNSPEC_VCVTPH2PS))]
18994  "TARGET_AVX512F"
18995  "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18996  [(set_attr "type" "ssecvt")
18997   (set_attr "prefix" "evex")
18998   (set_attr "mode" "V16SF")])
18999
19000(define_expand "vcvtps2ph_mask"
19001  [(set (match_operand:V8HI 0 "register_operand")
19002	(vec_merge:V8HI
19003	  (vec_concat:V8HI
19004	    (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
19005			  (match_operand:SI 2 "const_0_to_255_operand")]
19006			  UNSPEC_VCVTPS2PH)
19007	    (match_dup 5))
19008	   (match_operand:V8HI 3 "vector_move_operand")
19009	   (match_operand:QI 4 "register_operand")))]
19010  "TARGET_AVX512VL"
19011  "operands[5] = CONST0_RTX (V4HImode);")
19012
19013(define_expand "vcvtps2ph"
19014  [(set (match_operand:V8HI 0 "register_operand")
19015	(vec_concat:V8HI
19016	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
19017			(match_operand:SI 2 "const_0_to_255_operand")]
19018		       UNSPEC_VCVTPS2PH)
19019	  (match_dup 3)))]
19020  "TARGET_F16C"
19021  "operands[3] = CONST0_RTX (V4HImode);")
19022
19023(define_insn "*vcvtps2ph<mask_name>"
19024  [(set (match_operand:V8HI 0 "register_operand" "=v")
19025	(vec_concat:V8HI
19026	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19027			(match_operand:SI 2 "const_0_to_255_operand" "N")]
19028		       UNSPEC_VCVTPS2PH)
19029	  (match_operand:V4HI 3 "const0_operand")))]
19030  "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
19031  "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
19032  [(set_attr "type" "ssecvt")
19033   (set_attr "prefix" "maybe_evex")
19034   (set_attr "mode" "V4SF")])
19035
19036(define_insn "*vcvtps2ph_store<mask_name>"
19037  [(set (match_operand:V4HI 0 "memory_operand" "=m")
19038	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19039		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
19040		     UNSPEC_VCVTPS2PH))]
19041  "TARGET_F16C || TARGET_AVX512VL"
19042  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19043  [(set_attr "type" "ssecvt")
19044   (set_attr "prefix" "maybe_evex")
19045   (set_attr "mode" "V4SF")])
19046
19047(define_insn "vcvtps2ph256<mask_name>"
19048  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
19049	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
19050		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
19051		     UNSPEC_VCVTPS2PH))]
19052  "TARGET_F16C || TARGET_AVX512VL"
19053  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19054  [(set_attr "type" "ssecvt")
19055   (set_attr "prefix" "maybe_evex")
19056   (set_attr "btver2_decode" "vector")
19057   (set_attr "mode" "V8SF")])
19058
19059(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
19060  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
19061	(unspec:V16HI
19062	  [(match_operand:V16SF 1 "register_operand" "v")
19063	   (match_operand:SI 2 "const_0_to_255_operand" "N")]
19064	  UNSPEC_VCVTPS2PH))]
19065  "TARGET_AVX512F"
19066  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19067  [(set_attr "type" "ssecvt")
19068   (set_attr "prefix" "evex")
19069   (set_attr "mode" "V16SF")])
19070
19071;; For gather* insn patterns
19072(define_mode_iterator VEC_GATHER_MODE
19073		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
19074(define_mode_attr VEC_GATHER_IDXSI
19075		      [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
19076		       (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
19077		       (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
19078		       (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
19079
19080(define_mode_attr VEC_GATHER_IDXDI
19081		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19082		       (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
19083		       (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
19084		       (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
19085
19086(define_mode_attr VEC_GATHER_SRCDI
19087		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19088		       (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
19089		       (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
19090		       (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
19091
19092(define_expand "avx2_gathersi<mode>"
19093  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19094		   (unspec:VEC_GATHER_MODE
19095		     [(match_operand:VEC_GATHER_MODE 1 "register_operand")
19096		      (mem:<ssescalarmode>
19097			(match_par_dup 6
19098			  [(match_operand 2 "vsib_address_operand")
19099			   (match_operand:<VEC_GATHER_IDXSI>
19100			      3 "register_operand")
19101			   (match_operand:SI 5 "const1248_operand ")]))
19102		      (mem:BLK (scratch))
19103		      (match_operand:VEC_GATHER_MODE 4 "register_operand")]
19104		     UNSPEC_GATHER))
19105	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19106  "TARGET_AVX2"
19107{
19108  operands[6]
19109    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19110					operands[5]), UNSPEC_VSIBADDR);
19111})
19112
19113(define_insn "*avx2_gathersi<mode>"
19114  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19115	(unspec:VEC_GATHER_MODE
19116	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
19117	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19118	     [(unspec:P
19119		[(match_operand:P 3 "vsib_address_operand" "Tv")
19120		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
19121		 (match_operand:SI 6 "const1248_operand" "n")]
19122		UNSPEC_VSIBADDR)])
19123	   (mem:BLK (scratch))
19124	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
19125	  UNSPEC_GATHER))
19126   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19127  "TARGET_AVX2"
19128  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
19129  [(set_attr "type" "ssemov")
19130   (set_attr "prefix" "vex")
19131   (set_attr "mode" "<sseinsnmode>")])
19132
19133(define_insn "*avx2_gathersi<mode>_2"
19134  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19135	(unspec:VEC_GATHER_MODE
19136	  [(pc)
19137	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19138	     [(unspec:P
19139		[(match_operand:P 2 "vsib_address_operand" "Tv")
19140		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
19141		 (match_operand:SI 5 "const1248_operand" "n")]
19142		UNSPEC_VSIBADDR)])
19143	   (mem:BLK (scratch))
19144	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
19145	  UNSPEC_GATHER))
19146   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19147  "TARGET_AVX2"
19148  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
19149  [(set_attr "type" "ssemov")
19150   (set_attr "prefix" "vex")
19151   (set_attr "mode" "<sseinsnmode>")])
19152
19153(define_expand "avx2_gatherdi<mode>"
19154  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19155		   (unspec:VEC_GATHER_MODE
19156		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19157		      (mem:<ssescalarmode>
19158			(match_par_dup 6
19159			  [(match_operand 2 "vsib_address_operand")
19160			   (match_operand:<VEC_GATHER_IDXDI>
19161			      3 "register_operand")
19162			   (match_operand:SI 5 "const1248_operand ")]))
19163		      (mem:BLK (scratch))
19164		      (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
19165		     UNSPEC_GATHER))
19166	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19167  "TARGET_AVX2"
19168{
19169  operands[6]
19170    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19171					operands[5]), UNSPEC_VSIBADDR);
19172})
19173
19174(define_insn "*avx2_gatherdi<mode>"
19175  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19176	(unspec:VEC_GATHER_MODE
19177	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19178	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19179	     [(unspec:P
19180		[(match_operand:P 3 "vsib_address_operand" "Tv")
19181		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19182		 (match_operand:SI 6 "const1248_operand" "n")]
19183		UNSPEC_VSIBADDR)])
19184	   (mem:BLK (scratch))
19185	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19186	  UNSPEC_GATHER))
19187   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19188  "TARGET_AVX2"
19189  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
19190  [(set_attr "type" "ssemov")
19191   (set_attr "prefix" "vex")
19192   (set_attr "mode" "<sseinsnmode>")])
19193
19194(define_insn "*avx2_gatherdi<mode>_2"
19195  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19196	(unspec:VEC_GATHER_MODE
19197	  [(pc)
19198	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19199	     [(unspec:P
19200		[(match_operand:P 2 "vsib_address_operand" "Tv")
19201		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19202		 (match_operand:SI 5 "const1248_operand" "n")]
19203		UNSPEC_VSIBADDR)])
19204	   (mem:BLK (scratch))
19205	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19206	  UNSPEC_GATHER))
19207   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19208  "TARGET_AVX2"
19209{
19210  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19211    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
19212  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
19213}
19214  [(set_attr "type" "ssemov")
19215   (set_attr "prefix" "vex")
19216   (set_attr "mode" "<sseinsnmode>")])
19217
19218(define_insn "*avx2_gatherdi<mode>_3"
19219  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19220	(vec_select:<VEC_GATHER_SRCDI>
19221	  (unspec:VI4F_256
19222	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19223	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19224	       [(unspec:P
19225		  [(match_operand:P 3 "vsib_address_operand" "Tv")
19226		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19227		   (match_operand:SI 6 "const1248_operand" "n")]
19228		  UNSPEC_VSIBADDR)])
19229	     (mem:BLK (scratch))
19230	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19231	     UNSPEC_GATHER)
19232	  (parallel [(const_int 0) (const_int 1)
19233		     (const_int 2) (const_int 3)])))
19234   (clobber (match_scratch:VI4F_256 1 "=&x"))]
19235  "TARGET_AVX2"
19236  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
19237  [(set_attr "type" "ssemov")
19238   (set_attr "prefix" "vex")
19239   (set_attr "mode" "<sseinsnmode>")])
19240
19241(define_insn "*avx2_gatherdi<mode>_4"
19242  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19243	(vec_select:<VEC_GATHER_SRCDI>
19244	  (unspec:VI4F_256
19245	    [(pc)
19246	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19247	       [(unspec:P
19248		  [(match_operand:P 2 "vsib_address_operand" "Tv")
19249		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19250		   (match_operand:SI 5 "const1248_operand" "n")]
19251		  UNSPEC_VSIBADDR)])
19252	     (mem:BLK (scratch))
19253	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19254	    UNSPEC_GATHER)
19255	  (parallel [(const_int 0) (const_int 1)
19256		     (const_int 2) (const_int 3)])))
19257   (clobber (match_scratch:VI4F_256 1 "=&x"))]
19258  "TARGET_AVX2"
19259  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
19260  [(set_attr "type" "ssemov")
19261   (set_attr "prefix" "vex")
19262   (set_attr "mode" "<sseinsnmode>")])
19263
19264;; Memory operand override for -masm=intel of the v*gatherq* patterns.
19265(define_mode_attr gatherq_mode
19266  [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
19267   (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
19268   (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
19269
19270(define_expand "<avx512>_gathersi<mode>"
19271  [(parallel [(set (match_operand:VI48F 0 "register_operand")
19272		   (unspec:VI48F
19273		     [(match_operand:VI48F 1 "register_operand")
19274		      (match_operand:<avx512fmaskmode> 4 "register_operand")
19275		      (mem:<ssescalarmode>
19276			(match_par_dup 6
19277			  [(match_operand 2 "vsib_address_operand")
19278			   (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
19279			   (match_operand:SI 5 "const1248_operand")]))]
19280		     UNSPEC_GATHER))
19281	      (clobber (match_scratch:<avx512fmaskmode> 7))])]
19282  "TARGET_AVX512F"
19283{
19284  operands[6]
19285    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19286					operands[5]), UNSPEC_VSIBADDR);
19287})
19288
19289(define_insn "*avx512f_gathersi<mode>"
19290  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19291	(unspec:VI48F
19292	  [(match_operand:VI48F 1 "register_operand" "0")
19293	   (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
19294	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19295	     [(unspec:P
19296		[(match_operand:P 4 "vsib_address_operand" "Tv")
19297		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
19298		 (match_operand:SI 5 "const1248_operand" "n")]
19299		UNSPEC_VSIBADDR)])]
19300	  UNSPEC_GATHER))
19301   (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
19302  "TARGET_AVX512F"
19303  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
19304  [(set_attr "type" "ssemov")
19305   (set_attr "prefix" "evex")
19306   (set_attr "mode" "<sseinsnmode>")])
19307
19308(define_insn "*avx512f_gathersi<mode>_2"
19309  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19310	(unspec:VI48F
19311	  [(pc)
19312	   (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19313	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19314	     [(unspec:P
19315		[(match_operand:P 3 "vsib_address_operand" "Tv")
19316		 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19317		 (match_operand:SI 4 "const1248_operand" "n")]
19318		UNSPEC_VSIBADDR)])]
19319	  UNSPEC_GATHER))
19320   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19321  "TARGET_AVX512F"
19322  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
19323  [(set_attr "type" "ssemov")
19324   (set_attr "prefix" "evex")
19325   (set_attr "mode" "<sseinsnmode>")])
19326
19327
19328(define_expand "<avx512>_gatherdi<mode>"
19329  [(parallel [(set (match_operand:VI48F 0 "register_operand")
19330		   (unspec:VI48F
19331		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19332		      (match_operand:QI 4 "register_operand")
19333		      (mem:<ssescalarmode>
19334			(match_par_dup 6
19335			  [(match_operand 2 "vsib_address_operand")
19336			   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
19337			   (match_operand:SI 5 "const1248_operand")]))]
19338		     UNSPEC_GATHER))
19339	      (clobber (match_scratch:QI 7))])]
19340  "TARGET_AVX512F"
19341{
19342  operands[6]
19343    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19344					operands[5]), UNSPEC_VSIBADDR);
19345})
19346
19347(define_insn "*avx512f_gatherdi<mode>"
19348  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19349	(unspec:VI48F
19350	  [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
19351	   (match_operand:QI 7 "register_operand" "2")
19352	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19353	     [(unspec:P
19354		[(match_operand:P 4 "vsib_address_operand" "Tv")
19355		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
19356		 (match_operand:SI 5 "const1248_operand" "n")]
19357		UNSPEC_VSIBADDR)])]
19358	  UNSPEC_GATHER))
19359   (clobber (match_scratch:QI 2 "=&Yk"))]
19360  "TARGET_AVX512F"
19361{
19362  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
19363}
19364  [(set_attr "type" "ssemov")
19365   (set_attr "prefix" "evex")
19366   (set_attr "mode" "<sseinsnmode>")])
19367
19368(define_insn "*avx512f_gatherdi<mode>_2"
19369  [(set (match_operand:VI48F 0 "register_operand" "=&v")
19370	(unspec:VI48F
19371	  [(pc)
19372	   (match_operand:QI 6 "register_operand" "1")
19373	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19374	     [(unspec:P
19375		[(match_operand:P 3 "vsib_address_operand" "Tv")
19376		 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19377		 (match_operand:SI 4 "const1248_operand" "n")]
19378		UNSPEC_VSIBADDR)])]
19379	  UNSPEC_GATHER))
19380   (clobber (match_scratch:QI 1 "=&Yk"))]
19381  "TARGET_AVX512F"
19382{
19383  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19384    {
19385      if (<MODE_SIZE> != 64)
19386	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
19387      else
19388	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
19389    }
19390  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
19391}
19392  [(set_attr "type" "ssemov")
19393   (set_attr "prefix" "evex")
19394   (set_attr "mode" "<sseinsnmode>")])
19395
19396(define_expand "<avx512>_scattersi<mode>"
19397  [(parallel [(set (mem:VI48F
19398		     (match_par_dup 5
19399		       [(match_operand 0 "vsib_address_operand")
19400			(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
19401			(match_operand:SI 4 "const1248_operand")]))
19402		   (unspec:VI48F
19403		     [(match_operand:<avx512fmaskmode> 1 "register_operand")
19404		      (match_operand:VI48F 3 "register_operand")]
19405		     UNSPEC_SCATTER))
19406	      (clobber (match_scratch:<avx512fmaskmode> 6))])]
19407  "TARGET_AVX512F"
19408{
19409  operands[5]
19410    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19411					operands[4]), UNSPEC_VSIBADDR);
19412})
19413
19414(define_insn "*avx512f_scattersi<mode>"
19415  [(set (match_operator:VI48F 5 "vsib_mem_operator"
19416	  [(unspec:P
19417	     [(match_operand:P 0 "vsib_address_operand" "Tv")
19418	      (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19419	      (match_operand:SI 4 "const1248_operand" "n")]
19420	     UNSPEC_VSIBADDR)])
19421	(unspec:VI48F
19422	  [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19423	   (match_operand:VI48F 3 "register_operand" "v")]
19424	  UNSPEC_SCATTER))
19425   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19426  "TARGET_AVX512F"
19427  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
19428  [(set_attr "type" "ssemov")
19429   (set_attr "prefix" "evex")
19430   (set_attr "mode" "<sseinsnmode>")])
19431
19432(define_expand "<avx512>_scatterdi<mode>"
19433  [(parallel [(set (mem:VI48F
19434		     (match_par_dup 5
19435		       [(match_operand 0 "vsib_address_operand")
19436			(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
19437			(match_operand:SI 4 "const1248_operand")]))
19438		   (unspec:VI48F
19439		     [(match_operand:QI 1 "register_operand")
19440		      (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
19441		     UNSPEC_SCATTER))
19442	      (clobber (match_scratch:QI 6))])]
19443  "TARGET_AVX512F"
19444{
19445  operands[5]
19446    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19447					operands[4]), UNSPEC_VSIBADDR);
19448})
19449
19450(define_insn "*avx512f_scatterdi<mode>"
19451  [(set (match_operator:VI48F 5 "vsib_mem_operator"
19452	  [(unspec:P
19453	     [(match_operand:P 0 "vsib_address_operand" "Tv")
19454	      (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19455	      (match_operand:SI 4 "const1248_operand" "n")]
19456	     UNSPEC_VSIBADDR)])
19457	(unspec:VI48F
19458	  [(match_operand:QI 6 "register_operand" "1")
19459	   (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
19460	  UNSPEC_SCATTER))
19461   (clobber (match_scratch:QI 1 "=&Yk"))]
19462  "TARGET_AVX512F"
19463{
19464  if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
19465    return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
19466  return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
19467}
19468  [(set_attr "type" "ssemov")
19469   (set_attr "prefix" "evex")
19470   (set_attr "mode" "<sseinsnmode>")])
19471
19472(define_insn "<avx512>_compress<mode>_mask"
19473  [(set (match_operand:VI48F 0 "register_operand" "=v")
19474	(unspec:VI48F
19475	  [(match_operand:VI48F 1 "register_operand" "v")
19476	   (match_operand:VI48F 2 "vector_move_operand" "0C")
19477	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19478	  UNSPEC_COMPRESS))]
19479  "TARGET_AVX512F"
19480  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19481  [(set_attr "type" "ssemov")
19482   (set_attr "prefix" "evex")
19483   (set_attr "mode" "<sseinsnmode>")])
19484
19485(define_insn "compress<mode>_mask"
19486  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
19487	(unspec:VI12_AVX512VLBW
19488	  [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
19489	   (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C")
19490	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19491	  UNSPEC_COMPRESS))]
19492  "TARGET_AVX512VBMI2"
19493  "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19494  [(set_attr "type" "ssemov")
19495   (set_attr "prefix" "evex")
19496   (set_attr "mode" "<sseinsnmode>")])
19497
19498(define_insn "<avx512>_compressstore<mode>_mask"
19499  [(set (match_operand:VI48F 0 "memory_operand" "=m")
19500	(unspec:VI48F
19501	  [(match_operand:VI48F 1 "register_operand" "x")
19502	   (match_dup 0)
19503	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19504	  UNSPEC_COMPRESS_STORE))]
19505  "TARGET_AVX512F"
19506  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19507  [(set_attr "type" "ssemov")
19508   (set_attr "prefix" "evex")
19509   (set_attr "memory" "store")
19510   (set_attr "mode" "<sseinsnmode>")])
19511
19512(define_insn "compressstore<mode>_mask"
19513  [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
19514	(unspec:VI12_AVX512VLBW
19515	  [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
19516	   (match_dup 0)
19517	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19518	  UNSPEC_COMPRESS_STORE))]
19519  "TARGET_AVX512VBMI2"
19520  "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19521  [(set_attr "type" "ssemov")
19522   (set_attr "prefix" "evex")
19523   (set_attr "memory" "store")
19524   (set_attr "mode" "<sseinsnmode>")])
19525
19526(define_expand "<avx512>_expand<mode>_maskz"
19527  [(set (match_operand:VI48F 0 "register_operand")
19528	(unspec:VI48F
19529	  [(match_operand:VI48F 1 "nonimmediate_operand")
19530	   (match_operand:VI48F 2 "vector_move_operand")
19531	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
19532	  UNSPEC_EXPAND))]
19533  "TARGET_AVX512F"
19534  "operands[2] = CONST0_RTX (<MODE>mode);")
19535
19536(define_insn "<avx512>_expand<mode>_mask"
19537  [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19538	(unspec:VI48F
19539	  [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19540	   (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
19541	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19542	  UNSPEC_EXPAND))]
19543  "TARGET_AVX512F"
19544  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19545  [(set_attr "type" "ssemov")
19546   (set_attr "prefix" "evex")
19547   (set_attr "memory" "none,load")
19548   (set_attr "mode" "<sseinsnmode>")])
19549
19550(define_insn "expand<mode>_mask"
19551  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
19552	(unspec:VI12_AVX512VLBW
19553	  [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
19554	   (match_operand:VI12_AVX512VLBW 2 "vector_move_operand" "0C,0C")
19555	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19556	  UNSPEC_EXPAND))]
19557  "TARGET_AVX512VBMI2"
19558  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19559  [(set_attr "type" "ssemov")
19560   (set_attr "prefix" "evex")
19561   (set_attr "memory" "none,load")
19562   (set_attr "mode" "<sseinsnmode>")])
19563
19564(define_expand "expand<mode>_maskz"
19565  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
19566	(unspec:VI12_AVX512VLBW
19567	  [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
19568	   (match_operand:VI12_AVX512VLBW 2 "vector_move_operand")
19569	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
19570	  UNSPEC_EXPAND))]
19571  "TARGET_AVX512VBMI2"
19572  "operands[2] = CONST0_RTX (<MODE>mode);")
19573
19574(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19575  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19576	(unspec:VF_AVX512VL
19577	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19578	   (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19579	   (match_operand:SI 3 "const_0_to_15_operand")]
19580	  UNSPEC_RANGE))]
19581  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
19582  "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
19583  [(set_attr "type" "sse")
19584   (set_attr "prefix" "evex")
19585   (set_attr "mode" "<MODE>")])
19586
19587(define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
19588  [(set (match_operand:VF_128 0 "register_operand" "=v")
19589	(vec_merge:VF_128
19590	  (unspec:VF_128
19591	    [(match_operand:VF_128 1 "register_operand" "v")
19592	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19593	     (match_operand:SI 3 "const_0_to_15_operand")]
19594	    UNSPEC_RANGE)
19595	  (match_dup 1)
19596	  (const_int 1)))]
19597  "TARGET_AVX512DQ"
19598  "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
19599  [(set_attr "type" "sse")
19600   (set_attr "prefix" "evex")
19601   (set_attr "mode" "<MODE>")])
19602
19603(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19604  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19605          (unspec:<avx512fmaskmode>
19606            [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19607             (match_operand:QI 2 "const_0_to_255_operand" "n")]
19608             UNSPEC_FPCLASS))]
19609   "TARGET_AVX512DQ"
19610   "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19611  [(set_attr "type" "sse")
19612   (set_attr "length_immediate" "1")
19613   (set_attr "prefix" "evex")
19614   (set_attr "mode" "<MODE>")])
19615
19616(define_insn "avx512dq_vmfpclass<mode>"
19617  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19618	(and:<avx512fmaskmode>
19619	  (unspec:<avx512fmaskmode>
19620	    [(match_operand:VF_128 1 "register_operand" "v")
19621             (match_operand:QI 2 "const_0_to_255_operand" "n")]
19622	    UNSPEC_FPCLASS)
19623	  (const_int 1)))]
19624   "TARGET_AVX512DQ"
19625   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19626  [(set_attr "type" "sse")
19627   (set_attr "length_immediate" "1")
19628   (set_attr "prefix" "evex")
19629   (set_attr "mode" "<MODE>")])
19630
19631(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19632  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19633	(unspec:VF_AVX512VL
19634	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
19635	   (match_operand:SI 2 "const_0_to_15_operand")]
19636	  UNSPEC_GETMANT))]
19637  "TARGET_AVX512F"
19638  "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
19639  [(set_attr "prefix" "evex")
19640   (set_attr "mode" "<MODE>")])
19641
19642(define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
19643  [(set (match_operand:VF_128 0 "register_operand" "=v")
19644	(vec_merge:VF_128
19645	  (unspec:VF_128
19646	    [(match_operand:VF_128 1 "register_operand" "v")
19647	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19648	     (match_operand:SI 3 "const_0_to_15_operand")]
19649	    UNSPEC_GETMANT)
19650	  (match_dup 1)
19651	  (const_int 1)))]
19652   "TARGET_AVX512F"
19653   "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
19654   [(set_attr "prefix" "evex")
19655   (set_attr "mode" "<ssescalarmode>")])
19656
19657;; The correct representation for this is absolutely enormous, and
19658;; surely not generally useful.
19659(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19660  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19661	(unspec:VI2_AVX512VL
19662	  [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19663	   (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19664	   (match_operand:SI 3 "const_0_to_255_operand")]
19665	  UNSPEC_DBPSADBW))]
19666   "TARGET_AVX512BW"
19667  "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19668  [(set_attr "type" "sselog1")
19669   (set_attr "length_immediate" "1")
19670   (set_attr "prefix" "evex")
19671   (set_attr "mode" "<sseinsnmode>")])
19672
19673(define_insn "clz<mode>2<mask_name>"
19674  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19675	(clz:VI48_AVX512VL
19676	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
19677  "TARGET_AVX512CD"
19678  "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19679  [(set_attr "type" "sse")
19680   (set_attr "prefix" "evex")
19681   (set_attr "mode" "<sseinsnmode>")])
19682
19683(define_insn "<mask_codefor>conflict<mode><mask_name>"
19684  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19685	(unspec:VI48_AVX512VL
19686	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
19687	  UNSPEC_CONFLICT))]
19688  "TARGET_AVX512CD"
19689  "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19690  [(set_attr "type" "sse")
19691   (set_attr "prefix" "evex")
19692   (set_attr "mode" "<sseinsnmode>")])
19693
19694(define_insn "sha1msg1"
19695  [(set (match_operand:V4SI 0 "register_operand" "=x")
19696	(unspec:V4SI
19697	  [(match_operand:V4SI 1 "register_operand" "0")
19698	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19699	  UNSPEC_SHA1MSG1))]
19700  "TARGET_SHA"
19701  "sha1msg1\t{%2, %0|%0, %2}"
19702  [(set_attr "type" "sselog1")
19703   (set_attr "mode" "TI")])
19704
19705(define_insn "sha1msg2"
19706  [(set (match_operand:V4SI 0 "register_operand" "=x")
19707	(unspec:V4SI
19708	  [(match_operand:V4SI 1 "register_operand" "0")
19709	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19710	  UNSPEC_SHA1MSG2))]
19711  "TARGET_SHA"
19712  "sha1msg2\t{%2, %0|%0, %2}"
19713  [(set_attr "type" "sselog1")
19714   (set_attr "mode" "TI")])
19715
19716(define_insn "sha1nexte"
19717  [(set (match_operand:V4SI 0 "register_operand" "=x")
19718	(unspec:V4SI
19719	  [(match_operand:V4SI 1 "register_operand" "0")
19720	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19721	  UNSPEC_SHA1NEXTE))]
19722  "TARGET_SHA"
19723  "sha1nexte\t{%2, %0|%0, %2}"
19724  [(set_attr "type" "sselog1")
19725   (set_attr "mode" "TI")])
19726
19727(define_insn "sha1rnds4"
19728  [(set (match_operand:V4SI 0 "register_operand" "=x")
19729	(unspec:V4SI
19730	  [(match_operand:V4SI 1 "register_operand" "0")
19731	   (match_operand:V4SI 2 "vector_operand" "xBm")
19732	   (match_operand:SI 3 "const_0_to_3_operand" "n")]
19733	  UNSPEC_SHA1RNDS4))]
19734  "TARGET_SHA"
19735  "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19736  [(set_attr "type" "sselog1")
19737   (set_attr "length_immediate" "1")
19738   (set_attr "mode" "TI")])
19739
19740(define_insn "sha256msg1"
19741  [(set (match_operand:V4SI 0 "register_operand" "=x")
19742	(unspec:V4SI
19743	  [(match_operand:V4SI 1 "register_operand" "0")
19744	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19745	  UNSPEC_SHA256MSG1))]
19746  "TARGET_SHA"
19747  "sha256msg1\t{%2, %0|%0, %2}"
19748  [(set_attr "type" "sselog1")
19749   (set_attr "mode" "TI")])
19750
19751(define_insn "sha256msg2"
19752  [(set (match_operand:V4SI 0 "register_operand" "=x")
19753	(unspec:V4SI
19754	  [(match_operand:V4SI 1 "register_operand" "0")
19755	   (match_operand:V4SI 2 "vector_operand" "xBm")]
19756	  UNSPEC_SHA256MSG2))]
19757  "TARGET_SHA"
19758  "sha256msg2\t{%2, %0|%0, %2}"
19759  [(set_attr "type" "sselog1")
19760   (set_attr "mode" "TI")])
19761
19762(define_insn "sha256rnds2"
19763  [(set (match_operand:V4SI 0 "register_operand" "=x")
19764	(unspec:V4SI
19765	  [(match_operand:V4SI 1 "register_operand" "0")
19766	   (match_operand:V4SI 2 "vector_operand" "xBm")
19767	   (match_operand:V4SI 3 "register_operand" "Yz")]
19768	  UNSPEC_SHA256RNDS2))]
19769  "TARGET_SHA"
19770  "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19771  [(set_attr "type" "sselog1")
19772   (set_attr "length_immediate" "1")
19773   (set_attr "mode" "TI")])
19774
19775(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19776  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19777	(unspec:AVX512MODE2P
19778	  [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19779	  UNSPEC_CAST))]
19780  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19781  "#"
19782  "&& reload_completed"
19783  [(set (match_dup 0) (match_dup 1))]
19784{
19785  if (REG_P (operands[0]))
19786    operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
19787  else
19788    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19789				  <ssequartermode>mode);
19790})
19791
19792(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19793  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19794	(unspec:AVX512MODE2P
19795	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19796	  UNSPEC_CAST))]
19797  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19798  "#"
19799  "&& reload_completed"
19800  [(set (match_dup 0) (match_dup 1))]
19801{
19802  if (REG_P (operands[0]))
19803    operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
19804  else
19805    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19806				  <ssehalfvecmode>mode);
19807})
19808
19809(define_int_iterator VPMADD52
19810	[UNSPEC_VPMADD52LUQ
19811	 UNSPEC_VPMADD52HUQ])
19812
19813(define_int_attr vpmadd52type
19814  [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19815
19816(define_expand "vpamdd52huq<mode>_maskz"
19817  [(match_operand:VI8_AVX512VL 0 "register_operand")
19818   (match_operand:VI8_AVX512VL 1 "register_operand")
19819   (match_operand:VI8_AVX512VL 2 "register_operand")
19820   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19821   (match_operand:<avx512fmaskmode> 4 "register_operand")]
19822  "TARGET_AVX512IFMA"
19823{
19824  emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19825    operands[0], operands[1], operands[2], operands[3],
19826    CONST0_RTX (<MODE>mode), operands[4]));
19827  DONE;
19828})
19829
19830(define_expand "vpamdd52luq<mode>_maskz"
19831  [(match_operand:VI8_AVX512VL 0 "register_operand")
19832   (match_operand:VI8_AVX512VL 1 "register_operand")
19833   (match_operand:VI8_AVX512VL 2 "register_operand")
19834   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19835   (match_operand:<avx512fmaskmode> 4 "register_operand")]
19836  "TARGET_AVX512IFMA"
19837{
19838  emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19839    operands[0], operands[1], operands[2], operands[3],
19840    CONST0_RTX (<MODE>mode), operands[4]));
19841  DONE;
19842})
19843
19844(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19845  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19846	(unspec:VI8_AVX512VL
19847	  [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19848	   (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19849	   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19850	  VPMADD52))]
19851  "TARGET_AVX512IFMA"
19852  "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19853  [(set_attr "type" "ssemuladd")
19854   (set_attr "prefix" "evex")
19855   (set_attr "mode" "<sseinsnmode>")])
19856
19857(define_insn "vpamdd52<vpmadd52type><mode>_mask"
19858  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19859	(vec_merge:VI8_AVX512VL
19860	  (unspec:VI8_AVX512VL
19861	    [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19862	     (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19863	     (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19864	    VPMADD52)
19865	  (match_dup 1)
19866	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19867  "TARGET_AVX512IFMA"
19868  "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19869  [(set_attr "type" "ssemuladd")
19870   (set_attr "prefix" "evex")
19871   (set_attr "mode" "<sseinsnmode>")])
19872
19873(define_insn "vpmultishiftqb<mode><mask_name>"
19874  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19875	(unspec:VI1_AVX512VL
19876	  [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19877	   (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19878	  UNSPEC_VPMULTISHIFT))]
19879  "TARGET_AVX512VBMI"
19880  "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19881  [(set_attr "type" "sselog")
19882   (set_attr "prefix" "evex")
19883   (set_attr "mode" "<sseinsnmode>")])
19884
19885(define_mode_iterator IMOD4
19886  [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
19887
19888(define_mode_attr imod4_narrow
19889  [(V64SF "V16SF") (V64SI "V16SI")])
19890
19891(define_expand "mov<mode>"
19892  [(set (match_operand:IMOD4 0 "nonimmediate_operand")
19893	(match_operand:IMOD4 1 "vector_move_operand"))]
19894  "TARGET_AVX512F"
19895{
19896  ix86_expand_vector_move (<MODE>mode, operands);
19897  DONE;
19898})
19899
19900(define_insn_and_split "*mov<mode>_internal"
19901  [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
19902	(match_operand:IMOD4 1 "vector_move_operand"  " C,vm,v"))]
19903  "TARGET_AVX512F
19904   && (register_operand (operands[0], <MODE>mode)
19905       || register_operand (operands[1], <MODE>mode))"
19906  "#"
19907  "&& reload_completed"
19908  [(const_int 0)]
19909{
19910  rtx op0, op1;
19911  int i;
19912
19913  for (i = 0; i < 4; i++)
19914    {
19915      op0 = simplify_subreg
19916	     (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
19917      op1 = simplify_subreg
19918	     (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
19919      emit_move_insn (op0, op1);
19920    }
19921  DONE;
19922})
19923
19924(define_insn "avx5124fmaddps_4fmaddps"
19925  [(set (match_operand:V16SF 0 "register_operand" "=v")
19926	(unspec:V16SF
19927	  [(match_operand:V16SF 1 "register_operand" "0")
19928	   (match_operand:V64SF 2 "register_operand" "Yh")
19929	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19930  "TARGET_AVX5124FMAPS"
19931  "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19932   [(set_attr ("type") ("ssemuladd"))
19933    (set_attr ("prefix") ("evex"))
19934    (set_attr ("mode") ("V16SF"))])
19935
19936(define_insn "avx5124fmaddps_4fmaddps_mask"
19937  [(set (match_operand:V16SF 0 "register_operand" "=v")
19938	(vec_merge:V16SF
19939	  (unspec:V16SF
19940	     [(match_operand:V64SF 1 "register_operand" "Yh")
19941	      (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19942	  (match_operand:V16SF 3 "register_operand" "0")
19943	  (match_operand:HI 4 "register_operand" "Yk")))]
19944  "TARGET_AVX5124FMAPS"
19945  "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
19946   [(set_attr ("type") ("ssemuladd"))
19947    (set_attr ("prefix") ("evex"))
19948    (set_attr ("mode") ("V16SF"))])
19949
19950(define_insn "avx5124fmaddps_4fmaddps_maskz"
19951  [(set (match_operand:V16SF 0 "register_operand" "=v")
19952	(vec_merge:V16SF
19953	  (unspec:V16SF
19954	    [(match_operand:V16SF 1 "register_operand" "0")
19955	     (match_operand:V64SF 2 "register_operand" "Yh")
19956	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19957	  (match_operand:V16SF 4 "const0_operand" "C")
19958	  (match_operand:HI 5 "register_operand" "Yk")))]
19959  "TARGET_AVX5124FMAPS"
19960  "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
19961   [(set_attr ("type") ("ssemuladd"))
19962    (set_attr ("prefix") ("evex"))
19963    (set_attr ("mode") ("V16SF"))])
19964
19965(define_insn "avx5124fmaddps_4fmaddss"
19966  [(set (match_operand:V4SF 0 "register_operand" "=v")
19967	(unspec:V4SF
19968	  [(match_operand:V4SF 1 "register_operand" "0")
19969	   (match_operand:V64SF 2 "register_operand" "Yh")
19970	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19971  "TARGET_AVX5124FMAPS"
19972  "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19973   [(set_attr ("type") ("ssemuladd"))
19974    (set_attr ("prefix") ("evex"))
19975    (set_attr ("mode") ("SF"))])
19976
19977(define_insn "avx5124fmaddps_4fmaddss_mask"
19978  [(set (match_operand:V4SF 0 "register_operand" "=v")
19979	(vec_merge:V4SF
19980	  (unspec:V4SF
19981	    [(match_operand:V64SF 1 "register_operand" "Yh")
19982	     (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19983	  (match_operand:V4SF 3 "register_operand" "0")
19984	  (match_operand:QI 4 "register_operand" "Yk")))]
19985  "TARGET_AVX5124FMAPS"
19986  "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
19987   [(set_attr ("type") ("ssemuladd"))
19988    (set_attr ("prefix") ("evex"))
19989    (set_attr ("mode") ("SF"))])
19990
19991(define_insn "avx5124fmaddps_4fmaddss_maskz"
19992  [(set (match_operand:V4SF 0 "register_operand" "=v")
19993	(vec_merge:V4SF
19994	  (unspec:V4SF
19995	    [(match_operand:V4SF 1 "register_operand" "0")
19996	     (match_operand:V64SF 2 "register_operand" "Yh")
19997	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19998	  (match_operand:V4SF 4 "const0_operand" "C")
19999	  (match_operand:QI 5 "register_operand" "Yk")))]
20000  "TARGET_AVX5124FMAPS"
20001  "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
20002   [(set_attr ("type") ("ssemuladd"))
20003    (set_attr ("prefix") ("evex"))
20004    (set_attr ("mode") ("SF"))])
20005
20006(define_insn "avx5124fmaddps_4fnmaddps"
20007  [(set (match_operand:V16SF 0 "register_operand" "=v")
20008	(unspec:V16SF
20009	  [(match_operand:V16SF 1 "register_operand" "0")
20010	   (match_operand:V64SF 2 "register_operand" "Yh")
20011	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
20012  "TARGET_AVX5124FMAPS"
20013  "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
20014   [(set_attr ("type") ("ssemuladd"))
20015    (set_attr ("prefix") ("evex"))
20016    (set_attr ("mode") ("V16SF"))])
20017
20018(define_insn "avx5124fmaddps_4fnmaddps_mask"
20019  [(set (match_operand:V16SF 0 "register_operand" "=v")
20020	(vec_merge:V16SF
20021	  (unspec:V16SF
20022	     [(match_operand:V64SF 1 "register_operand" "Yh")
20023	      (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20024	  (match_operand:V16SF 3 "register_operand" "0")
20025	  (match_operand:HI 4 "register_operand" "Yk")))]
20026  "TARGET_AVX5124FMAPS"
20027  "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
20028   [(set_attr ("type") ("ssemuladd"))
20029    (set_attr ("prefix") ("evex"))
20030    (set_attr ("mode") ("V16SF"))])
20031
20032(define_insn "avx5124fmaddps_4fnmaddps_maskz"
20033  [(set (match_operand:V16SF 0 "register_operand" "=v")
20034	(vec_merge:V16SF
20035	  (unspec:V16SF
20036	    [(match_operand:V16SF 1 "register_operand" "0")
20037	     (match_operand:V64SF 2 "register_operand" "Yh")
20038	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20039	  (match_operand:V16SF 4 "const0_operand" "C")
20040	  (match_operand:HI 5 "register_operand" "Yk")))]
20041  "TARGET_AVX5124FMAPS"
20042  "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
20043   [(set_attr ("type") ("ssemuladd"))
20044    (set_attr ("prefix") ("evex"))
20045    (set_attr ("mode") ("V16SF"))])
20046
20047(define_insn "avx5124fmaddps_4fnmaddss"
20048  [(set (match_operand:V4SF 0 "register_operand" "=v")
20049	(unspec:V4SF
20050	  [(match_operand:V4SF 1 "register_operand" "0")
20051	   (match_operand:V64SF 2 "register_operand" "Yh")
20052	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
20053  "TARGET_AVX5124FMAPS"
20054  "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
20055   [(set_attr ("type") ("ssemuladd"))
20056    (set_attr ("prefix") ("evex"))
20057    (set_attr ("mode") ("SF"))])
20058
20059(define_insn "avx5124fmaddps_4fnmaddss_mask"
20060  [(set (match_operand:V4SF 0 "register_operand" "=v")
20061	(vec_merge:V4SF
20062	  (unspec:V4SF
20063	    [(match_operand:V64SF 1 "register_operand" "Yh")
20064	     (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20065	  (match_operand:V4SF 3 "register_operand" "0")
20066	  (match_operand:QI 4 "register_operand" "Yk")))]
20067  "TARGET_AVX5124FMAPS"
20068  "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
20069   [(set_attr ("type") ("ssemuladd"))
20070    (set_attr ("prefix") ("evex"))
20071    (set_attr ("mode") ("SF"))])
20072
20073(define_insn "avx5124fmaddps_4fnmaddss_maskz"
20074  [(set (match_operand:V4SF 0 "register_operand" "=v")
20075	(vec_merge:V4SF
20076	  (unspec:V4SF
20077	    [(match_operand:V4SF 1 "register_operand" "0")
20078	     (match_operand:V64SF 2 "register_operand" "Yh")
20079	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20080	  (match_operand:V4SF 4 "const0_operand" "C")
20081	  (match_operand:QI 5 "register_operand" "Yk")))]
20082  "TARGET_AVX5124FMAPS"
20083  "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
20084   [(set_attr ("type") ("ssemuladd"))
20085    (set_attr ("prefix") ("evex"))
20086    (set_attr ("mode") ("SF"))])
20087
20088(define_insn "avx5124vnniw_vp4dpwssd"
20089  [(set (match_operand:V16SI 0 "register_operand" "=v")
20090	(unspec:V16SI
20091	  [(match_operand:V16SI 1 "register_operand" "0")
20092	   (match_operand:V64SI 2 "register_operand" "Yh")
20093	   (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
20094  "TARGET_AVX5124VNNIW"
20095  "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
20096   [(set_attr ("type") ("ssemuladd"))
20097    (set_attr ("prefix") ("evex"))
20098    (set_attr ("mode") ("TI"))])
20099
20100(define_insn "avx5124vnniw_vp4dpwssd_mask"
20101  [(set (match_operand:V16SI 0 "register_operand" "=v")
20102	(vec_merge:V16SI
20103	  (unspec:V16SI
20104	     [(match_operand:V64SI 1 "register_operand" "Yh")
20105	      (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20106	  (match_operand:V16SI 3 "register_operand" "0")
20107	  (match_operand:HI 4 "register_operand" "Yk")))]
20108  "TARGET_AVX5124VNNIW"
20109  "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
20110   [(set_attr ("type") ("ssemuladd"))
20111    (set_attr ("prefix") ("evex"))
20112    (set_attr ("mode") ("TI"))])
20113
20114(define_insn "avx5124vnniw_vp4dpwssd_maskz"
20115  [(set (match_operand:V16SI 0 "register_operand" "=v")
20116	(vec_merge:V16SI
20117	  (unspec:V16SI
20118	    [(match_operand:V16SI 1 "register_operand" "0")
20119	     (match_operand:V64SI 2 "register_operand" "Yh")
20120	     (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20121	  (match_operand:V16SI 4 "const0_operand" "C")
20122	  (match_operand:HI 5 "register_operand" "Yk")))]
20123  "TARGET_AVX5124VNNIW"
20124  "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
20125   [(set_attr ("type") ("ssemuladd"))
20126    (set_attr ("prefix") ("evex"))
20127    (set_attr ("mode") ("TI"))])
20128
20129(define_insn "avx5124vnniw_vp4dpwssds"
20130  [(set (match_operand:V16SI 0 "register_operand" "=v")
20131	(unspec:V16SI
20132	  [(match_operand:V16SI 1 "register_operand" "0")
20133	   (match_operand:V64SI 2 "register_operand" "Yh")
20134	   (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
20135  "TARGET_AVX5124VNNIW"
20136  "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
20137   [(set_attr ("type") ("ssemuladd"))
20138    (set_attr ("prefix") ("evex"))
20139    (set_attr ("mode") ("TI"))])
20140
20141(define_insn "avx5124vnniw_vp4dpwssds_mask"
20142  [(set (match_operand:V16SI 0 "register_operand" "=v")
20143	(vec_merge:V16SI
20144	  (unspec:V16SI
20145	     [(match_operand:V64SI 1 "register_operand" "Yh")
20146	      (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20147	  (match_operand:V16SI 3 "register_operand" "0")
20148	  (match_operand:HI 4 "register_operand" "Yk")))]
20149  "TARGET_AVX5124VNNIW"
20150  "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
20151   [(set_attr ("type") ("ssemuladd"))
20152    (set_attr ("prefix") ("evex"))
20153    (set_attr ("mode") ("TI"))])
20154
20155(define_insn "avx5124vnniw_vp4dpwssds_maskz"
20156  [(set (match_operand:V16SI 0 "register_operand" "=v")
20157	(vec_merge:V16SI
20158	  (unspec:V16SI
20159	    [(match_operand:V16SI 1 "register_operand" "0")
20160	     (match_operand:V64SI 2 "register_operand" "Yh")
20161	     (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20162	  (match_operand:V16SI 4 "const0_operand" "C")
20163	  (match_operand:HI 5 "register_operand" "Yk")))]
20164  "TARGET_AVX5124VNNIW"
20165  "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
20166   [(set_attr ("type") ("ssemuladd"))
20167    (set_attr ("prefix") ("evex"))
20168    (set_attr ("mode") ("TI"))])
20169
20170(define_insn "vpopcount<mode><mask_name>"
20171  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
20172	(popcount:VI48_AVX512VL
20173	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
20174  "TARGET_AVX512VPOPCNTDQ"
20175  "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20176
20177;; Save multiple registers out-of-line.
20178(define_insn "save_multiple<mode>"
20179  [(match_parallel 0 "save_multiple"
20180    [(use (match_operand:P 1 "symbol_operand"))])]
20181  "TARGET_SSE && TARGET_64BIT"
20182  "call\t%P1")
20183
20184;; Restore multiple registers out-of-line.
20185(define_insn "restore_multiple<mode>"
20186  [(match_parallel 0 "restore_multiple"
20187    [(use (match_operand:P 1 "symbol_operand"))])]
20188  "TARGET_SSE && TARGET_64BIT"
20189  "call\t%P1")
20190
20191;; Restore multiple registers out-of-line and return.
20192(define_insn "restore_multiple_and_return<mode>"
20193  [(match_parallel 0 "restore_multiple"
20194    [(return)
20195     (use (match_operand:P 1 "symbol_operand"))
20196     (set (reg:DI SP_REG) (reg:DI R10_REG))
20197    ])]
20198  "TARGET_SSE && TARGET_64BIT"
20199  "jmp\t%P1")
20200
20201;; Restore multiple registers out-of-line when hard frame pointer is used,
20202;; perform the leave operation prior to returning (from the function).
20203(define_insn "restore_multiple_leave_return<mode>"
20204  [(match_parallel 0 "restore_multiple"
20205    [(return)
20206     (use (match_operand:P 1 "symbol_operand"))
20207     (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
20208     (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
20209     (clobber (mem:BLK (scratch)))
20210    ])]
20211  "TARGET_SSE && TARGET_64BIT"
20212  "jmp\t%P1")
20213
20214(define_insn "vpopcount<mode><mask_name>"
20215  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
20216	(popcount:VI12_AVX512VL
20217	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
20218  "TARGET_AVX512BITALG"
20219  "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20220
20221(define_insn "vgf2p8affineinvqb_<mode><mask_name>"
20222  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20223	(unspec:VI1_AVX512F
20224	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20225	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20226	   (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20227	  UNSPEC_GF2P8AFFINEINV))]
20228  "TARGET_GFNI"
20229  "@
20230   gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
20231   vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20232   vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20233  [(set_attr "isa" "noavx,avx,avx512f")
20234   (set_attr "prefix_data16" "1,*,*")
20235   (set_attr "prefix_extra" "1")
20236   (set_attr "prefix" "orig,maybe_evex,evex")
20237   (set_attr "mode" "<sseinsnmode>")])
20238
20239(define_insn "vgf2p8affineqb_<mode><mask_name>"
20240  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20241	(unspec:VI1_AVX512F
20242	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20243	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
20244	   (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
20245	  UNSPEC_GF2P8AFFINE))]
20246  "TARGET_GFNI"
20247  "@
20248   gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
20249   vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
20250   vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
20251  [(set_attr "isa" "noavx,avx,avx512f")
20252   (set_attr "prefix_data16" "1,*,*")
20253   (set_attr "prefix_extra" "1")
20254   (set_attr "prefix" "orig,maybe_evex,evex")
20255   (set_attr "mode" "<sseinsnmode>")])
20256
20257(define_insn "vgf2p8mulb_<mode><mask_name>"
20258  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
20259	(unspec:VI1_AVX512F
20260	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
20261	   (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
20262	  UNSPEC_GF2P8MUL))]
20263  "TARGET_GFNI"
20264  "@
20265   gf2p8mulb\t{%2, %0| %0, %2}
20266   vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
20267   vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
20268  [(set_attr "isa" "noavx,avx,avx512f")
20269   (set_attr "prefix_data16" "1,*,*")
20270   (set_attr "prefix_extra" "1")
20271   (set_attr "prefix" "orig,maybe_evex,evex")
20272   (set_attr "mode" "<sseinsnmode>")])
20273
20274(define_insn "vpshrd_<mode><mask_name>"
20275  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20276	(unspec:VI248_AVX512VL
20277	  [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
20278	   (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
20279	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
20280	  UNSPEC_VPSHRD))]
20281  "TARGET_AVX512VBMI2"
20282  "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20283   [(set_attr ("prefix") ("evex"))])
20284
20285(define_insn "vpshld_<mode><mask_name>"
20286  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20287	(unspec:VI248_AVX512VL
20288	  [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
20289	   (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
20290	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
20291	  UNSPEC_VPSHLD))]
20292  "TARGET_AVX512VBMI2"
20293  "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
20294   [(set_attr ("prefix") ("evex"))])
20295
20296(define_insn "vpshrdv_<mode>"
20297  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20298	(unspec:VI248_AVX512VL
20299	  [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20300	   (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20301	   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20302	  UNSPEC_VPSHRDV))]
20303  "TARGET_AVX512VBMI2"
20304  "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
20305   [(set_attr ("prefix") ("evex"))
20306   (set_attr "mode" "<sseinsnmode>")])
20307
20308(define_insn "vpshrdv_<mode>_mask"
20309  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20310	(vec_merge:VI248_AVX512VL
20311	  (unspec:VI248_AVX512VL
20312	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20313	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20314	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20315	    UNSPEC_VPSHRDV)
20316	  (match_dup 1)
20317	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20318  "TARGET_AVX512VBMI2"
20319  "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20320   [(set_attr ("prefix") ("evex"))
20321   (set_attr "mode" "<sseinsnmode>")])
20322
20323(define_expand "vpshrdv_<mode>_maskz"
20324  [(match_operand:VI248_AVX512VL 0 "register_operand")
20325   (match_operand:VI248_AVX512VL 1 "register_operand")
20326   (match_operand:VI248_AVX512VL 2 "register_operand")
20327   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
20328   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20329  "TARGET_AVX512VBMI2"
20330{
20331  emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
20332					 operands[2], operands[3],
20333					 CONST0_RTX (<MODE>mode),
20334						     operands[4]));
20335  DONE;
20336})
20337
20338(define_insn "vpshrdv_<mode>_maskz_1"
20339  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20340	(vec_merge:VI248_AVX512VL
20341	  (unspec:VI248_AVX512VL
20342	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20343	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20344	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20345	    UNSPEC_VPSHRDV)
20346	  (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
20347	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20348  "TARGET_AVX512VBMI2"
20349  "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20350   [(set_attr ("prefix") ("evex"))
20351   (set_attr "mode" "<sseinsnmode>")])
20352
20353(define_insn "vpshldv_<mode>"
20354  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20355	(unspec:VI248_AVX512VL
20356	  [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20357	   (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20358	   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20359	  UNSPEC_VPSHLDV))]
20360  "TARGET_AVX512VBMI2"
20361  "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
20362   [(set_attr ("prefix") ("evex"))
20363   (set_attr "mode" "<sseinsnmode>")])
20364
20365(define_insn "vpshldv_<mode>_mask"
20366  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20367	(vec_merge:VI248_AVX512VL
20368	  (unspec:VI248_AVX512VL
20369	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20370	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20371	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20372	    UNSPEC_VPSHLDV)
20373	  (match_dup 1)
20374	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20375  "TARGET_AVX512VBMI2"
20376  "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20377   [(set_attr ("prefix") ("evex"))
20378   (set_attr "mode" "<sseinsnmode>")])
20379
20380(define_expand "vpshldv_<mode>_maskz"
20381  [(match_operand:VI248_AVX512VL 0 "register_operand")
20382   (match_operand:VI248_AVX512VL 1 "register_operand")
20383   (match_operand:VI248_AVX512VL 2 "register_operand")
20384   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
20385   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20386  "TARGET_AVX512VBMI2"
20387{
20388  emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
20389					 operands[2], operands[3],
20390					 CONST0_RTX (<MODE>mode),
20391						     operands[4]));
20392  DONE;
20393})
20394
20395(define_insn "vpshldv_<mode>_maskz_1"
20396  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
20397	(vec_merge:VI248_AVX512VL
20398	  (unspec:VI248_AVX512VL
20399	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
20400	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
20401	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
20402	    UNSPEC_VPSHLDV)
20403	  (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
20404	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20405  "TARGET_AVX512VBMI2"
20406  "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20407   [(set_attr ("prefix") ("evex"))
20408   (set_attr "mode" "<sseinsnmode>")])
20409
20410(define_insn "vpdpbusd_<mode>"
20411  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20412	(unspec:VI4_AVX512VL
20413	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20414	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20415	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20416	  UNSPEC_VPMADDUBSWACCD))]
20417  "TARGET_AVX512VNNI"
20418  "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
20419   [(set_attr ("prefix") ("evex"))])
20420
20421(define_insn "vpdpbusd_<mode>_mask"
20422  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20423	(vec_merge:VI4_AVX512VL
20424	  (unspec:VI4_AVX512VL
20425	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20426	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20427	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20428	    UNSPEC_VPMADDUBSWACCD)
20429	  (match_dup 1)
20430	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20431  "TARGET_AVX512VNNI"
20432  "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20433   [(set_attr ("prefix") ("evex"))])
20434
20435(define_expand "vpdpbusd_<mode>_maskz"
20436  [(match_operand:VI4_AVX512VL 0 "register_operand")
20437   (match_operand:VI4_AVX512VL 1 "register_operand")
20438   (match_operand:VI4_AVX512VL 2 "register_operand")
20439   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20440   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20441  "TARGET_AVX512VNNI"
20442{
20443  emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
20444					  operands[2], operands[3],
20445					  CONST0_RTX (<MODE>mode),
20446						      operands[4]));
20447  DONE;
20448})
20449
20450(define_insn "vpdpbusd_<mode>_maskz_1"
20451  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20452	(vec_merge:VI4_AVX512VL
20453	  (unspec:VI4_AVX512VL
20454	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20455	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20456	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
20457	    ] UNSPEC_VPMADDUBSWACCD)
20458	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20459	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20460  "TARGET_AVX512VNNI"
20461  "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20462   [(set_attr ("prefix") ("evex"))])
20463
20464
20465(define_insn "vpdpbusds_<mode>"
20466  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20467	(unspec:VI4_AVX512VL
20468	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20469	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20470	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20471	  UNSPEC_VPMADDUBSWACCSSD))]
20472  "TARGET_AVX512VNNI"
20473  "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
20474   [(set_attr ("prefix") ("evex"))])
20475
20476(define_insn "vpdpbusds_<mode>_mask"
20477  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20478	(vec_merge:VI4_AVX512VL
20479	  (unspec:VI4_AVX512VL
20480	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20481	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20482	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20483	    UNSPEC_VPMADDUBSWACCSSD)
20484	  (match_dup 1)
20485	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20486  "TARGET_AVX512VNNI"
20487  "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20488   [(set_attr ("prefix") ("evex"))])
20489
20490(define_expand "vpdpbusds_<mode>_maskz"
20491  [(match_operand:VI4_AVX512VL 0 "register_operand")
20492   (match_operand:VI4_AVX512VL 1 "register_operand")
20493   (match_operand:VI4_AVX512VL 2 "register_operand")
20494   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20495   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20496  "TARGET_AVX512VNNI"
20497{
20498  emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
20499					   operands[2], operands[3],
20500					   CONST0_RTX (<MODE>mode),
20501						       operands[4]));
20502  DONE;
20503})
20504
20505(define_insn "vpdpbusds_<mode>_maskz_1"
20506  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20507	(vec_merge:VI4_AVX512VL
20508	  (unspec:VI4_AVX512VL
20509	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20510	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20511	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20512	    UNSPEC_VPMADDUBSWACCSSD)
20513	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20514	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20515  "TARGET_AVX512VNNI"
20516  "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20517   [(set_attr ("prefix") ("evex"))])
20518
20519
20520(define_insn "vpdpwssd_<mode>"
20521  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20522	(unspec:VI4_AVX512VL
20523	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20524	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20525	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20526	  UNSPEC_VPMADDWDACCD))]
20527  "TARGET_AVX512VNNI"
20528  "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
20529   [(set_attr ("prefix") ("evex"))])
20530
20531(define_insn "vpdpwssd_<mode>_mask"
20532  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20533	(vec_merge:VI4_AVX512VL
20534	  (unspec:VI4_AVX512VL
20535	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20536	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20537	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20538	    UNSPEC_VPMADDWDACCD)
20539	  (match_dup 1)
20540	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20541  "TARGET_AVX512VNNI"
20542  "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20543   [(set_attr ("prefix") ("evex"))])
20544
20545(define_expand "vpdpwssd_<mode>_maskz"
20546  [(match_operand:VI4_AVX512VL 0 "register_operand")
20547   (match_operand:VI4_AVX512VL 1 "register_operand")
20548   (match_operand:VI4_AVX512VL 2 "register_operand")
20549   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20550   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20551  "TARGET_AVX512VNNI"
20552{
20553  emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
20554					  operands[2], operands[3],
20555					  CONST0_RTX (<MODE>mode),
20556						      operands[4]));
20557  DONE;
20558})
20559
20560(define_insn "vpdpwssd_<mode>_maskz_1"
20561  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20562	(vec_merge:VI4_AVX512VL
20563	  (unspec:VI4_AVX512VL
20564	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20565	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20566	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20567	    UNSPEC_VPMADDWDACCD)
20568	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20569	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20570  "TARGET_AVX512VNNI"
20571  "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20572   [(set_attr ("prefix") ("evex"))])
20573
20574
20575(define_insn "vpdpwssds_<mode>"
20576  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20577	(unspec:VI4_AVX512VL
20578	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20579	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20580	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20581	  UNSPEC_VPMADDWDACCSSD))]
20582  "TARGET_AVX512VNNI"
20583  "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
20584   [(set_attr ("prefix") ("evex"))])
20585
20586(define_insn "vpdpwssds_<mode>_mask"
20587  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20588	(vec_merge:VI4_AVX512VL
20589	  (unspec:VI4_AVX512VL
20590	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20591	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20592	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20593	    UNSPEC_VPMADDWDACCSSD)
20594	  (match_dup 1)
20595	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20596  "TARGET_AVX512VNNI"
20597  "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
20598   [(set_attr ("prefix") ("evex"))])
20599
20600(define_expand "vpdpwssds_<mode>_maskz"
20601  [(match_operand:VI4_AVX512VL 0 "register_operand")
20602   (match_operand:VI4_AVX512VL 1 "register_operand")
20603   (match_operand:VI4_AVX512VL 2 "register_operand")
20604   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
20605   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20606  "TARGET_AVX512VNNI"
20607{
20608  emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
20609					   operands[2], operands[3],
20610					   CONST0_RTX (<MODE>mode),
20611						       operands[4]));
20612  DONE;
20613})
20614
20615(define_insn "vpdpwssds_<mode>_maskz_1"
20616  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20617	(vec_merge:VI4_AVX512VL
20618	  (unspec:VI4_AVX512VL
20619	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
20620	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
20621	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
20622	    UNSPEC_VPMADDWDACCSSD)
20623	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
20624	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
20625  "TARGET_AVX512VNNI"
20626  "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
20627   [(set_attr ("prefix") ("evex"))])
20628
20629(define_insn "vaesdec_<mode>"
20630  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20631	(unspec:VI1_AVX512VL_F
20632	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20633	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "v")]
20634	  UNSPEC_VAESDEC))]
20635  "TARGET_VAES"
20636  "vaesdec\t{%2, %1, %0|%0, %1, %2}"
20637)
20638
20639(define_insn "vaesdeclast_<mode>"
20640  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20641	(unspec:VI1_AVX512VL_F
20642	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20643	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "v")]
20644	  UNSPEC_VAESDECLAST))]
20645  "TARGET_VAES"
20646  "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
20647)
20648
20649(define_insn "vaesenc_<mode>"
20650  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20651	(unspec:VI1_AVX512VL_F
20652	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20653	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
20654	  UNSPEC_VAESENC))]
20655  "TARGET_VAES"
20656  "vaesenc\t{%2, %1, %0|%0, %1, %2}"
20657)
20658
20659(define_insn "vaesenclast_<mode>"
20660  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
20661	(unspec:VI1_AVX512VL_F
20662	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
20663	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
20664	  UNSPEC_VAESENCLAST))]
20665  "TARGET_VAES"
20666  "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
20667)
20668
20669(define_insn "vpclmulqdq_<mode>"
20670  [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
20671	(unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
20672			 (match_operand:VI8_FVL 2 "vector_operand" "vm")
20673			 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20674			UNSPEC_VPCLMULQDQ))]
20675  "TARGET_VPCLMULQDQ"
20676  "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20677  [(set_attr "mode" "DI")])
20678
20679(define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
20680  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
20681	(unspec:<avx512fmaskmode>
20682	  [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
20683	   (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
20684	  UNSPEC_VPSHUFBIT))]
20685  "TARGET_AVX512BITALG"
20686  "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
20687  [(set_attr "prefix" "evex")
20688   (set_attr "mode" "<sseinsnmode>")])
20689