1;; ARM NEON coprocessor Machine Description
2;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3;; Written by CodeSourcery.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21
22;; Attribute used to permit string comparisons against <VQH_mnem> in
23;; type attribute definitions.
24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26(define_insn "*neon_mov<mode>"
27  [(set (match_operand:VDX 0 "nonimmediate_operand"
28	  "=w,Un,w, w, w,  ?r,?w,?r,?r, ?Us")
29	(match_operand:VDX 1 "general_operand"
30	  " w,w, Dm,Dn,Uni, w, r, r, Usi,r"))]
31  "TARGET_NEON
32   && (register_operand (operands[0], <MODE>mode)
33       || register_operand (operands[1], <MODE>mode))"
34{
35  if (which_alternative == 2 || which_alternative == 3)
36    {
37      int width, is_valid;
38      static char templ[40];
39
40      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41        &operands[1], &width);
42
43      gcc_assert (is_valid != 0);
44
45      if (width == 0)
46        return "vmov.f32\t%P0, %1  @ <mode>";
47      else
48        sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
49
50      return templ;
51    }
52
53  switch (which_alternative)
54    {
55    case 0: return "vmov\t%P0, %P1  @ <mode>";
56    case 1: case 4: return output_move_neon (operands);
57    case 2: case 3: gcc_unreachable ();
58    case 5: return "vmov\t%Q0, %R0, %P1  @ <mode>";
59    case 6: return "vmov\t%P0, %Q1, %R1  @ <mode>";
60    default: return output_move_double (operands, true, NULL);
61    }
62}
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64                    neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
65		    neon_from_gp<q>,mov_reg,neon_load1_2reg,\
66		    neon_store1_2reg")
67  (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
68  (set_attr "arm_pool_range"     "*,*,*,*,1020,*,*,*,1020,*")
69  (set_attr "thumb2_pool_range"     "*,*,*,*,1018,*,*,*,1018,*")
70  (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,*,1004,*")])
71
72(define_insn "*neon_mov<mode>"
73  [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
74	  "=w,Un,w, w, w,  ?r,?w,?r,?r,  ?Us")
75	(match_operand:VQXMOV 1 "general_operand"
76	  " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
77  "TARGET_NEON
78   && (register_operand (operands[0], <MODE>mode)
79       || register_operand (operands[1], <MODE>mode))"
80{
81  if (which_alternative == 2 || which_alternative == 3)
82    {
83      int width, is_valid;
84      static char templ[40];
85
86      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
87        &operands[1], &width);
88
89      gcc_assert (is_valid != 0);
90
91      if (width == 0)
92        return "vmov.f32\t%q0, %1  @ <mode>";
93      else
94        sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
95
96      return templ;
97    }
98
99  switch (which_alternative)
100    {
101    case 0: return "vmov\t%q0, %q1  @ <mode>";
102    case 1: case 4: return output_move_neon (operands);
103    case 2: case 3: gcc_unreachable ();
104    case 5: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
105    case 6: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
106    default: return output_move_quad (operands);
107    }
108}
109  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
110                     neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
111                     neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
112   (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
113   (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
114   (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
115   (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
116
117/* We define these mov expanders to match the standard mov$a optab to prevent
118   the mid-end from trying to do a subreg for these modes which is the most
119   inefficient way to expand the move.  Also big-endian subreg's aren't
120   allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
121   Without these RTL generation patterns the mid-end would attempt to take a
122   sub-reg and may ICE if it can't.  */
123
124(define_expand "movti"
125  [(set (match_operand:TI 0 "nonimmediate_operand" "")
126	(match_operand:TI 1 "general_operand" ""))]
127  "TARGET_NEON"
128{
129  if (can_create_pseudo_p ())
130    {
131      if (!REG_P (operands[0]))
132	operands[1] = force_reg (TImode, operands[1]);
133    }
134})
135
136(define_expand "mov<mode>"
137  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
138	(match_operand:VSTRUCT 1 "general_operand" ""))]
139  "TARGET_NEON"
140{
141  if (can_create_pseudo_p ())
142    {
143      if (!REG_P (operands[0]))
144	operands[1] = force_reg (<MODE>mode, operands[1]);
145    }
146})
147
148(define_expand "mov<mode>"
149  [(set (match_operand:VH 0 "s_register_operand")
150	(match_operand:VH 1 "s_register_operand"))]
151  "TARGET_NEON"
152{
153  if (can_create_pseudo_p ())
154    {
155      if (!REG_P (operands[0]))
156	operands[1] = force_reg (<MODE>mode, operands[1]);
157    }
158})
159
160(define_insn "*neon_mov<mode>"
161  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand"	"=w,Ut,w")
162	(match_operand:VSTRUCT 1 "general_operand"	" w,w, Ut"))]
163  "TARGET_NEON
164   && (register_operand (operands[0], <MODE>mode)
165       || register_operand (operands[1], <MODE>mode))"
166{
167  switch (which_alternative)
168    {
169    case 0: return "#";
170    case 1: case 2: return output_move_neon (operands);
171    default: gcc_unreachable ();
172    }
173}
174  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
175   (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
176
177(define_split
178  [(set (match_operand:EI 0 "s_register_operand" "")
179	(match_operand:EI 1 "s_register_operand" ""))]
180  "TARGET_NEON && reload_completed"
181  [(set (match_dup 0) (match_dup 1))
182   (set (match_dup 2) (match_dup 3))]
183{
184  int rdest = REGNO (operands[0]);
185  int rsrc = REGNO (operands[1]);
186  rtx dest[2], src[2];
187
188  dest[0] = gen_rtx_REG (TImode, rdest);
189  src[0] = gen_rtx_REG (TImode, rsrc);
190  dest[1] = gen_rtx_REG (DImode, rdest + 4);
191  src[1] = gen_rtx_REG (DImode, rsrc + 4);
192
193  neon_disambiguate_copy (operands, dest, src, 2);
194})
195
196(define_split
197  [(set (match_operand:OI 0 "s_register_operand" "")
198	(match_operand:OI 1 "s_register_operand" ""))]
199  "TARGET_NEON && reload_completed"
200  [(set (match_dup 0) (match_dup 1))
201   (set (match_dup 2) (match_dup 3))]
202{
203  int rdest = REGNO (operands[0]);
204  int rsrc = REGNO (operands[1]);
205  rtx dest[2], src[2];
206
207  dest[0] = gen_rtx_REG (TImode, rdest);
208  src[0] = gen_rtx_REG (TImode, rsrc);
209  dest[1] = gen_rtx_REG (TImode, rdest + 4);
210  src[1] = gen_rtx_REG (TImode, rsrc + 4);
211
212  neon_disambiguate_copy (operands, dest, src, 2);
213})
214
215(define_split
216  [(set (match_operand:CI 0 "s_register_operand" "")
217	(match_operand:CI 1 "s_register_operand" ""))]
218  "TARGET_NEON && reload_completed"
219  [(set (match_dup 0) (match_dup 1))
220   (set (match_dup 2) (match_dup 3))
221   (set (match_dup 4) (match_dup 5))]
222{
223  int rdest = REGNO (operands[0]);
224  int rsrc = REGNO (operands[1]);
225  rtx dest[3], src[3];
226
227  dest[0] = gen_rtx_REG (TImode, rdest);
228  src[0] = gen_rtx_REG (TImode, rsrc);
229  dest[1] = gen_rtx_REG (TImode, rdest + 4);
230  src[1] = gen_rtx_REG (TImode, rsrc + 4);
231  dest[2] = gen_rtx_REG (TImode, rdest + 8);
232  src[2] = gen_rtx_REG (TImode, rsrc + 8);
233
234  neon_disambiguate_copy (operands, dest, src, 3);
235})
236
237(define_split
238  [(set (match_operand:XI 0 "s_register_operand" "")
239	(match_operand:XI 1 "s_register_operand" ""))]
240  "TARGET_NEON && reload_completed"
241  [(set (match_dup 0) (match_dup 1))
242   (set (match_dup 2) (match_dup 3))
243   (set (match_dup 4) (match_dup 5))
244   (set (match_dup 6) (match_dup 7))]
245{
246  int rdest = REGNO (operands[0]);
247  int rsrc = REGNO (operands[1]);
248  rtx dest[4], src[4];
249
250  dest[0] = gen_rtx_REG (TImode, rdest);
251  src[0] = gen_rtx_REG (TImode, rsrc);
252  dest[1] = gen_rtx_REG (TImode, rdest + 4);
253  src[1] = gen_rtx_REG (TImode, rsrc + 4);
254  dest[2] = gen_rtx_REG (TImode, rdest + 8);
255  src[2] = gen_rtx_REG (TImode, rsrc + 8);
256  dest[3] = gen_rtx_REG (TImode, rdest + 12);
257  src[3] = gen_rtx_REG (TImode, rsrc + 12);
258
259  neon_disambiguate_copy (operands, dest, src, 4);
260})
261
262(define_expand "movmisalign<mode>"
263  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
264	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
265		     UNSPEC_MISALIGNED_ACCESS))]
266  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
267{
268  rtx adjust_mem;
269  /* This pattern is not permitted to fail during expansion: if both arguments
270     are non-registers (e.g. memory := constant, which can be created by the
271     auto-vectorizer), force operand 1 into a register.  */
272  if (!s_register_operand (operands[0], <MODE>mode)
273      && !s_register_operand (operands[1], <MODE>mode))
274    operands[1] = force_reg (<MODE>mode, operands[1]);
275
276  if (s_register_operand (operands[0], <MODE>mode))
277    adjust_mem = operands[1];
278  else
279    adjust_mem = operands[0];
280
281  /* Legitimize address.  */
282  if (!neon_vector_mem_operand (adjust_mem, 2, true))
283    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
284
285})
286
287(define_insn "*movmisalign<mode>_neon_store"
288  [(set (match_operand:VDX 0 "neon_permissive_struct_operand"	"=Um")
289	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
290		    UNSPEC_MISALIGNED_ACCESS))]
291  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
292  "vst1.<V_sz_elem>\t{%P1}, %A0"
293  [(set_attr "type" "neon_store1_1reg<q>")])
294
295(define_insn "*movmisalign<mode>_neon_load"
296  [(set (match_operand:VDX 0 "s_register_operand"			"=w")
297	(unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
298									" Um")]
299		    UNSPEC_MISALIGNED_ACCESS))]
300  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
301  "vld1.<V_sz_elem>\t{%P0}, %A1"
302  [(set_attr "type" "neon_load1_1reg<q>")])
303
304(define_insn "*movmisalign<mode>_neon_store"
305  [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
306	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
307		    UNSPEC_MISALIGNED_ACCESS))]
308  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
309  "vst1.<V_sz_elem>\t{%q1}, %A0"
310  [(set_attr "type" "neon_store1_1reg<q>")])
311
312(define_insn "*movmisalign<mode>_neon_load"
313  [(set (match_operand:VQX 0 "s_register_operand"			"=w")
314	(unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
315									" Um")]
316		    UNSPEC_MISALIGNED_ACCESS))]
317  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
318  "vld1.<V_sz_elem>\t{%q0}, %A1"
319  [(set_attr "type" "neon_load1_1reg<q>")])
320
321(define_insn "vec_set<mode>_internal"
322  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
323        (vec_merge:VD_LANE
324          (vec_duplicate:VD_LANE
325            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
326          (match_operand:VD_LANE 3 "s_register_operand" "0,0")
327          (match_operand:SI 2 "immediate_operand" "i,i")))]
328  "TARGET_NEON"
329{
330  int elt = ffs ((int) INTVAL (operands[2])) - 1;
331  if (BYTES_BIG_ENDIAN)
332    elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
333  operands[2] = GEN_INT (elt);
334
335  if (which_alternative == 0)
336    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
337  else
338    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
339}
340  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
341
342(define_insn "vec_set<mode>_internal"
343  [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
344        (vec_merge:VQ2
345          (vec_duplicate:VQ2
346            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
347          (match_operand:VQ2 3 "s_register_operand" "0,0")
348          (match_operand:SI 2 "immediate_operand" "i,i")))]
349  "TARGET_NEON"
350{
351  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
352  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
353  int elt = elem % half_elts;
354  int hi = (elem / half_elts) * 2;
355  int regno = REGNO (operands[0]);
356
357  if (BYTES_BIG_ENDIAN)
358    elt = half_elts - 1 - elt;
359
360  operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
361  operands[2] = GEN_INT (elt);
362
363  if (which_alternative == 0)
364    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
365  else
366    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
367}
368  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
369)
370
371(define_insn "vec_setv2di_internal"
372  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
373        (vec_merge:V2DI
374          (vec_duplicate:V2DI
375            (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
376          (match_operand:V2DI 3 "s_register_operand" "0,0")
377          (match_operand:SI 2 "immediate_operand" "i,i")))]
378  "TARGET_NEON"
379{
380  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
381  int regno = REGNO (operands[0]) + 2 * elem;
382
383  operands[0] = gen_rtx_REG (DImode, regno);
384
385  if (which_alternative == 0)
386    return "vld1.64\t%P0, %A1";
387  else
388    return "vmov\t%P0, %Q1, %R1";
389}
390  [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
391)
392
393(define_expand "vec_set<mode>"
394  [(match_operand:VDQ 0 "s_register_operand" "")
395   (match_operand:<V_elem> 1 "s_register_operand" "")
396   (match_operand:SI 2 "immediate_operand" "")]
397  "TARGET_NEON"
398{
399  HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
400  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
401					 GEN_INT (elem), operands[0]));
402  DONE;
403})
404
405(define_insn "vec_extract<mode><V_elem_l>"
406  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
407        (vec_select:<V_elem>
408          (match_operand:VD_LANE 1 "s_register_operand" "w,w")
409          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
410  "TARGET_NEON"
411{
412  if (BYTES_BIG_ENDIAN)
413    {
414      int elt = INTVAL (operands[2]);
415      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
416      operands[2] = GEN_INT (elt);
417    }
418
419  if (which_alternative == 0)
420    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
421  else
422    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
423}
424  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
425)
426
427(define_insn "vec_extract<mode><V_elem_l>"
428  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
429	(vec_select:<V_elem>
430          (match_operand:VQ2 1 "s_register_operand" "w,w")
431          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
432  "TARGET_NEON"
433{
434  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
435  int elt = INTVAL (operands[2]) % half_elts;
436  int hi = (INTVAL (operands[2]) / half_elts) * 2;
437  int regno = REGNO (operands[1]);
438
439  if (BYTES_BIG_ENDIAN)
440    elt = half_elts - 1 - elt;
441
442  operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
443  operands[2] = GEN_INT (elt);
444
445  if (which_alternative == 0)
446    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
447  else
448    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
449}
450  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
451)
452
453(define_insn "vec_extractv2didi"
454  [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
455	(vec_select:DI
456          (match_operand:V2DI 1 "s_register_operand" "w,w")
457          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
458  "TARGET_NEON"
459{
460  int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
461
462  operands[1] = gen_rtx_REG (DImode, regno);
463
464  if (which_alternative == 0)
465    return "vst1.64\t{%P1}, %A0  @ v2di";
466  else
467    return "vmov\t%Q0, %R0, %P1  @ v2di";
468}
469  [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
470)
471
472(define_expand "vec_init<mode><V_elem_l>"
473  [(match_operand:VDQ 0 "s_register_operand" "")
474   (match_operand 1 "" "")]
475  "TARGET_NEON"
476{
477  neon_expand_vector_init (operands[0], operands[1]);
478  DONE;
479})
480
481;; Doubleword and quadword arithmetic.
482
483;; NOTE: some other instructions also support 64-bit integer
484;; element size, which we could potentially use for "long long" operations.
485
486(define_insn "*add<mode>3_neon"
487  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
488        (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
489		  (match_operand:VDQ 2 "s_register_operand" "w")))]
490  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
491  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
492  [(set (attr "type")
493      (if_then_else (match_test "<Is_float_mode>")
494                    (const_string "neon_fp_addsub_s<q>")
495                    (const_string "neon_add<q>")))]
496)
497
498;; As with SFmode, full support for HFmode vector arithmetic is only available
499;; when flag-unsafe-math-optimizations is enabled.
500
501(define_insn "add<mode>3"
502  [(set
503    (match_operand:VH 0 "s_register_operand" "=w")
504    (plus:VH
505     (match_operand:VH 1 "s_register_operand" "w")
506     (match_operand:VH 2 "s_register_operand" "w")))]
507 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
508 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
509 [(set (attr "type")
510   (if_then_else (match_test "<Is_float_mode>")
511    (const_string "neon_fp_addsub_s<q>")
512    (const_string "neon_add<q>")))]
513)
514
515(define_insn "add<mode>3_fp16"
516  [(set
517    (match_operand:VH 0 "s_register_operand" "=w")
518    (plus:VH
519     (match_operand:VH 1 "s_register_operand" "w")
520     (match_operand:VH 2 "s_register_operand" "w")))]
521 "TARGET_NEON_FP16INST"
522 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
523 [(set (attr "type")
524   (if_then_else (match_test "<Is_float_mode>")
525    (const_string "neon_fp_addsub_s<q>")
526    (const_string "neon_add<q>")))]
527)
528
529(define_insn "adddi3_neon"
530  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
531        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
532                 (match_operand:DI 2 "arm_adddi_operand"     "w,r,0,w,r,Dd,Dd")))
533   (clobber (reg:CC CC_REGNUM))]
534  "TARGET_NEON"
535{
536  switch (which_alternative)
537    {
538    case 0: /* fall through */
539    case 3: return "vadd.i64\t%P0, %P1, %P2";
540    case 1: return "#";
541    case 2: return "#";
542    case 4: return "#";
543    case 5: return "#";
544    case 6: return "#";
545    default: gcc_unreachable ();
546    }
547}
548  [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
549		     multiple,multiple,multiple")
550   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
551   (set_attr "length" "*,8,8,*,8,8,8")
552   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
553)
554
555(define_insn "*sub<mode>3_neon"
556  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
557        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
558                   (match_operand:VDQ 2 "s_register_operand" "w")))]
559  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
560  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
561  [(set (attr "type")
562      (if_then_else (match_test "<Is_float_mode>")
563                    (const_string "neon_fp_addsub_s<q>")
564                    (const_string "neon_sub<q>")))]
565)
566
567(define_insn "sub<mode>3"
568 [(set
569   (match_operand:VH 0 "s_register_operand" "=w")
570   (minus:VH
571    (match_operand:VH 1 "s_register_operand" "w")
572    (match_operand:VH 2 "s_register_operand" "w")))]
573 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
574 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
575 [(set_attr "type" "neon_sub<q>")]
576)
577
578(define_insn "sub<mode>3_fp16"
579 [(set
580   (match_operand:VH 0 "s_register_operand" "=w")
581   (minus:VH
582    (match_operand:VH 1 "s_register_operand" "w")
583    (match_operand:VH 2 "s_register_operand" "w")))]
584 "TARGET_NEON_FP16INST"
585 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
586 [(set_attr "type" "neon_sub<q>")]
587)
588
589(define_insn "subdi3_neon"
590  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
591        (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
592                  (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
593   (clobber (reg:CC CC_REGNUM))]
594  "TARGET_NEON"
595{
596  switch (which_alternative)
597    {
598    case 0: /* fall through */
599    case 4: return "vsub.i64\t%P0, %P1, %P2";
600    case 1: /* fall through */
601    case 2: /* fall through */
602    case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
603    default: gcc_unreachable ();
604    }
605}
606  [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
607   (set_attr "conds" "*,clob,clob,clob,*")
608   (set_attr "length" "*,8,8,8,*")
609   (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
610)
611
612(define_insn "*mul<mode>3_neon"
613  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
614        (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
615                   (match_operand:VDQW 2 "s_register_operand" "w")))]
616  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
617  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
618  [(set (attr "type")
619      (if_then_else (match_test "<Is_float_mode>")
620		    (const_string "neon_fp_mul_s<q>")
621                    (const_string "neon_mul_<V_elem_ch><q>")))]
622)
623
624(define_insn "mul<mode>3add<mode>_neon"
625  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
626        (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
627                            (match_operand:VDQW 3 "s_register_operand" "w"))
628		  (match_operand:VDQW 1 "s_register_operand" "0")))]
629  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
630  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
631  [(set (attr "type")
632      (if_then_else (match_test "<Is_float_mode>")
633		    (const_string "neon_fp_mla_s<q>")
634		    (const_string "neon_mla_<V_elem_ch><q>")))]
635)
636
637(define_insn "mul<mode>3add<mode>_neon"
638  [(set (match_operand:VH 0 "s_register_operand" "=w")
639	(plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
640			  (match_operand:VH 3 "s_register_operand" "w"))
641		  (match_operand:VH 1 "s_register_operand" "0")))]
642  "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
643  "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
644  [(set_attr "type" "neon_fp_mla_s<q>")]
645)
646
647(define_insn "mul<mode>3neg<mode>add<mode>_neon"
648  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
649        (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
650                    (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
651                               (match_operand:VDQW 3 "s_register_operand" "w"))))]
652  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
653  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654  [(set (attr "type")
655      (if_then_else (match_test "<Is_float_mode>")
656		    (const_string "neon_fp_mla_s<q>")
657		    (const_string "neon_mla_<V_elem_ch><q>")))]
658)
659
660;; Fused multiply-accumulate
661;; We define each insn twice here:
662;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
663;;       to be able to use when converting to FMA.
664;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
665(define_insn "fma<VCVTF:mode>4"
666  [(set (match_operand:VCVTF 0 "register_operand" "=w")
667        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
668		 (match_operand:VCVTF 2 "register_operand" "w")
669		 (match_operand:VCVTF 3 "register_operand" "0")))]
670  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
671  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
672  [(set_attr "type" "neon_fp_mla_s<q>")]
673)
674
675(define_insn "fma<VCVTF:mode>4_intrinsic"
676  [(set (match_operand:VCVTF 0 "register_operand" "=w")
677        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
678		 (match_operand:VCVTF 2 "register_operand" "w")
679		 (match_operand:VCVTF 3 "register_operand" "0")))]
680  "TARGET_NEON && TARGET_FMA"
681  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
682  [(set_attr "type" "neon_fp_mla_s<q>")]
683)
684
685(define_insn "fma<VH:mode>4"
686 [(set (match_operand:VH 0 "register_operand" "=w")
687   (fma:VH
688    (match_operand:VH 1 "register_operand" "w")
689    (match_operand:VH 2 "register_operand" "w")
690    (match_operand:VH 3 "register_operand" "0")))]
691 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
692 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
693 [(set_attr "type" "neon_fp_mla_s<q>")]
694)
695
696(define_insn "fma<VH:mode>4_intrinsic"
697 [(set (match_operand:VH 0 "register_operand" "=w")
698   (fma:VH
699    (match_operand:VH 1 "register_operand" "w")
700    (match_operand:VH 2 "register_operand" "w")
701    (match_operand:VH 3 "register_operand" "0")))]
702 "TARGET_NEON_FP16INST"
703 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
704 [(set_attr "type" "neon_fp_mla_s<q>")]
705)
706
707(define_insn "*fmsub<VCVTF:mode>4"
708  [(set (match_operand:VCVTF 0 "register_operand" "=w")
709        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
710		   (match_operand:VCVTF 2 "register_operand" "w")
711		   (match_operand:VCVTF 3 "register_operand" "0")))]
712  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
713  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714  [(set_attr "type" "neon_fp_mla_s<q>")]
715)
716
717(define_insn "fmsub<VCVTF:mode>4_intrinsic"
718 [(set (match_operand:VCVTF 0 "register_operand" "=w")
719   (fma:VCVTF
720    (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
721    (match_operand:VCVTF 2 "register_operand" "w")
722    (match_operand:VCVTF 3 "register_operand" "0")))]
723 "TARGET_NEON && TARGET_FMA"
724 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
725 [(set_attr "type" "neon_fp_mla_s<q>")]
726)
727
728(define_insn "fmsub<VH:mode>4_intrinsic"
729 [(set (match_operand:VH 0 "register_operand" "=w")
730   (fma:VH
731    (neg:VH (match_operand:VH 1 "register_operand" "w"))
732    (match_operand:VH 2 "register_operand" "w")
733    (match_operand:VH 3 "register_operand" "0")))]
734 "TARGET_NEON_FP16INST"
735 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
736 [(set_attr "type" "neon_fp_mla_s<q>")]
737)
738
739(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
740  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
741        (unspec:VCVTF [(match_operand:VCVTF 1
742		         "s_register_operand" "w")]
743		NEON_VRINT))]
744  "TARGET_NEON && TARGET_VFP5"
745  "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
746  [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
747)
748
749(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
750  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
751	(FIXUORS:<V_cmp_result> (unspec:VCVTF
752			       [(match_operand:VCVTF 1 "register_operand" "w")]
753			       NEON_VCVT)))]
754  "TARGET_NEON && TARGET_VFP5"
755  "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
756  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
757   (set_attr "predicable" "no")]
758)
759
760(define_insn "ior<mode>3"
761  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
762	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
763		 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
764  "TARGET_NEON"
765{
766  switch (which_alternative)
767    {
768    case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
769    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
770		     <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
771    default: gcc_unreachable ();
772    }
773}
774  [(set_attr "type" "neon_logic<q>")]
775)
776
777;; The concrete forms of the Neon immediate-logic instructions are vbic and
778;; vorr. We support the pseudo-instruction vand instead, because that
779;; corresponds to the canonical form the middle-end expects to use for
780;; immediate bitwise-ANDs.
781
782(define_insn "and<mode>3"
783  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
784	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
785		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
786  "TARGET_NEON"
787{
788  switch (which_alternative)
789    {
790    case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
791    case 1: return neon_output_logic_immediate ("vand", &operands[2],
792    		     <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
793    default: gcc_unreachable ();
794    }
795}
796  [(set_attr "type" "neon_logic<q>")]
797)
798
799(define_insn "orn<mode>3_neon"
800  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
801	(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
802		 (match_operand:VDQ 1 "s_register_operand" "w")))]
803  "TARGET_NEON"
804  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
805  [(set_attr "type" "neon_logic<q>")]
806)
807
808;; TODO: investigate whether we should disable
809;; this and bicdi3_neon for the A8 in line with the other
810;; changes above.
811(define_insn_and_split "orndi3_neon"
812  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
813	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
814		(match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
815  "TARGET_NEON"
816  "@
817   vorn\t%P0, %P1, %P2
818   #
819   #
820   #"
821  "reload_completed &&
822   (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
823  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
824   (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
825  "
826  {
827    if (TARGET_THUMB2)
828      {
829        operands[3] = gen_highpart (SImode, operands[0]);
830        operands[0] = gen_lowpart (SImode, operands[0]);
831        operands[4] = gen_highpart (SImode, operands[2]);
832        operands[2] = gen_lowpart (SImode, operands[2]);
833        operands[5] = gen_highpart (SImode, operands[1]);
834        operands[1] = gen_lowpart (SImode, operands[1]);
835      }
836    else
837      {
838        emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
839        emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
840        DONE;
841      }
842  }"
843  [(set_attr "type" "neon_logic,multiple,multiple,multiple")
844   (set_attr "length" "*,16,8,8")
845   (set_attr "arch" "any,a,t2,t2")]
846)
847
848(define_insn "bic<mode>3_neon"
849  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
850	(and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
851		 (match_operand:VDQ 1 "s_register_operand" "w")))]
852  "TARGET_NEON"
853  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
854  [(set_attr "type" "neon_logic<q>")]
855)
856
857;; Compare to *anddi_notdi_di.
858(define_insn "bicdi3_neon"
859  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
860        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
861		(match_operand:DI 1 "s_register_operand" "w,0,r")))]
862  "TARGET_NEON"
863  "@
864   vbic\t%P0, %P1, %P2
865   #
866   #"
867  [(set_attr "type" "neon_logic,multiple,multiple")
868   (set_attr "length" "*,8,8")]
869)
870
871(define_insn "xor<mode>3"
872  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
873	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
874		 (match_operand:VDQ 2 "s_register_operand" "w")))]
875  "TARGET_NEON"
876  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
877  [(set_attr "type" "neon_logic<q>")]
878)
879
880(define_insn "one_cmpl<mode>2"
881  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
882        (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
883  "TARGET_NEON"
884  "vmvn\t%<V_reg>0, %<V_reg>1"
885  [(set_attr "type" "neon_move<q>")]
886)
887
888(define_insn "abs<mode>2"
889  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
890	(abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
891  "TARGET_NEON"
892  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
893  [(set (attr "type")
894      (if_then_else (match_test "<Is_float_mode>")
895                    (const_string "neon_fp_abs_s<q>")
896                    (const_string "neon_abs<q>")))]
897)
898
899(define_insn "neg<mode>2"
900  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
901	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
902  "TARGET_NEON"
903  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
904  [(set (attr "type")
905      (if_then_else (match_test "<Is_float_mode>")
906                    (const_string "neon_fp_neg_s<q>")
907                    (const_string "neon_neg<q>")))]
908)
909
910(define_insn "negdi2_neon"
911  [(set (match_operand:DI 0 "s_register_operand"	 "=&w, w,r,&r")
912	(neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
913   (clobber (match_scratch:DI 2				 "= X,&w,X, X"))
914   (clobber (reg:CC CC_REGNUM))]
915  "TARGET_NEON"
916  "#"
917  [(set_attr "length" "8")
918   (set_attr "type" "multiple")]
919)
920
921; Split negdi2_neon for vfp registers
922(define_split
923  [(set (match_operand:DI 0 "s_register_operand" "")
924	(neg:DI (match_operand:DI 1 "s_register_operand" "")))
925   (clobber (match_scratch:DI 2 ""))
926   (clobber (reg:CC CC_REGNUM))]
927  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
928  [(set (match_dup 2) (const_int 0))
929   (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
930	      (clobber (reg:CC CC_REGNUM))])]
931  {
932    if (!REG_P (operands[2]))
933      operands[2] = operands[0];
934  }
935)
936
937; Split negdi2_neon for core registers
938(define_split
939  [(set (match_operand:DI 0 "s_register_operand" "")
940	(neg:DI (match_operand:DI 1 "s_register_operand" "")))
941   (clobber (match_scratch:DI 2 ""))
942   (clobber (reg:CC CC_REGNUM))]
943  "TARGET_32BIT && reload_completed
944   && arm_general_register_operand (operands[0], DImode)"
945  [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
946	      (clobber (reg:CC CC_REGNUM))])]
947  ""
948)
949
950(define_insn "<absneg_str><mode>2"
951  [(set (match_operand:VH 0 "s_register_operand" "=w")
952    (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
953 "TARGET_NEON_FP16INST"
954 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
955 [(set_attr "type" "neon_abs<q>")]
956)
957
958(define_expand "neon_v<absneg_str><mode>"
959 [(set
960   (match_operand:VH 0 "s_register_operand")
961   (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
962 "TARGET_NEON_FP16INST"
963{
964  emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
965  DONE;
966})
967
968(define_insn "neon_v<fp16_rnd_str><mode>"
969  [(set (match_operand:VH 0 "s_register_operand" "=w")
970    (unspec:VH
971     [(match_operand:VH 1 "s_register_operand" "w")]
972     FP16_RND))]
973 "TARGET_NEON_FP16INST"
974 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
975 [(set_attr "type" "neon_fp_round_s<q>")]
976)
977
978(define_insn "neon_vrsqrte<mode>"
979  [(set (match_operand:VH 0 "s_register_operand" "=w")
980    (unspec:VH
981     [(match_operand:VH 1 "s_register_operand" "w")]
982     UNSPEC_VRSQRTE))]
983  "TARGET_NEON_FP16INST"
984  "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
985 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
986)
987
988(define_insn "*umin<mode>3_neon"
989  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
990	(umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
991		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
992  "TARGET_NEON"
993  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
994  [(set_attr "type" "neon_minmax<q>")]
995)
996
997(define_insn "*umax<mode>3_neon"
998  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
999	(umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1000		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
1001  "TARGET_NEON"
1002  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1003  [(set_attr "type" "neon_minmax<q>")]
1004)
1005
1006(define_insn "*smin<mode>3_neon"
1007  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1008	(smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1009		   (match_operand:VDQW 2 "s_register_operand" "w")))]
1010  "TARGET_NEON"
1011  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1012  [(set (attr "type")
1013      (if_then_else (match_test "<Is_float_mode>")
1014                    (const_string "neon_fp_minmax_s<q>")
1015                    (const_string "neon_minmax<q>")))]
1016)
1017
1018(define_insn "*smax<mode>3_neon"
1019  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1020	(smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1021		   (match_operand:VDQW 2 "s_register_operand" "w")))]
1022  "TARGET_NEON"
1023  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1024  [(set (attr "type")
1025      (if_then_else (match_test "<Is_float_mode>")
1026                    (const_string "neon_fp_minmax_s<q>")
1027                    (const_string "neon_minmax<q>")))]
1028)
1029
1030; TODO: V2DI shifts are current disabled because there are bugs in the
1031; generic vectorizer code.  It ends up creating a V2DI constructor with
1032; SImode elements.
1033
1034(define_insn "vashl<mode>3"
1035  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1036	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1037		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
1038  "TARGET_NEON"
1039  {
1040    switch (which_alternative)
1041      {
1042        case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1043        case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1044                         			    <MODE>mode,
1045						    VALID_NEON_QREG_MODE (<MODE>mode),
1046						    true);
1047        default: gcc_unreachable ();
1048      }
1049  }
1050  [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1051)
1052
1053(define_insn "vashr<mode>3_imm"
1054  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1055	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1056			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1057  "TARGET_NEON"
1058  {
1059    return neon_output_shift_immediate ("vshr", 's', &operands[2],
1060					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1061					false);
1062  }
1063  [(set_attr "type" "neon_shift_imm<q>")]
1064)
1065
1066(define_insn "vlshr<mode>3_imm"
1067  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1068	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1069			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1070  "TARGET_NEON"
1071  {
1072    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1073					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1074					false);
1075  }
1076  [(set_attr "type" "neon_shift_imm<q>")]
1077)
1078
1079; Used for implementing logical shift-right, which is a left-shift by a negative
1080; amount, with signed operands. This is essentially the same as ashl<mode>3
1081; above, but using an unspec in case GCC tries anything tricky with negative
1082; shift amounts.
1083
1084(define_insn "ashl<mode>3_signed"
1085  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1086	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1087		      (match_operand:VDQI 2 "s_register_operand" "w")]
1088		     UNSPEC_ASHIFT_SIGNED))]
1089  "TARGET_NEON"
1090  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1091  [(set_attr "type" "neon_shift_reg<q>")]
1092)
1093
1094; Used for implementing logical shift-right, which is a left-shift by a negative
1095; amount, with unsigned operands.
1096
1097(define_insn "ashl<mode>3_unsigned"
1098  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1099	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1100		      (match_operand:VDQI 2 "s_register_operand" "w")]
1101		     UNSPEC_ASHIFT_UNSIGNED))]
1102  "TARGET_NEON"
1103  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1104  [(set_attr "type" "neon_shift_reg<q>")]
1105)
1106
1107(define_expand "vashr<mode>3"
1108  [(set (match_operand:VDQIW 0 "s_register_operand" "")
1109	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1110			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1111  "TARGET_NEON"
1112{
1113  if (s_register_operand (operands[2], <MODE>mode))
1114    {
1115      rtx neg = gen_reg_rtx (<MODE>mode);
1116      emit_insn (gen_neg<mode>2 (neg, operands[2]));
1117      emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1118    }
1119  else
1120    emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1121  DONE;
1122})
1123
1124(define_expand "vlshr<mode>3"
1125  [(set (match_operand:VDQIW 0 "s_register_operand" "")
1126	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1127			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1128  "TARGET_NEON"
1129{
1130  if (s_register_operand (operands[2], <MODE>mode))
1131    {
1132      rtx neg = gen_reg_rtx (<MODE>mode);
1133      emit_insn (gen_neg<mode>2 (neg, operands[2]));
1134      emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1135    }
1136  else
1137    emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1138  DONE;
1139})
1140
1141;; 64-bit shifts
1142
1143;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1144;; leaving the upper half uninitalized.  This is OK since the shift
1145;; instruction only looks at the low 8 bits anyway.  To avoid confusing
1146;; data flow analysis however, we pretend the full register is set
1147;; using an unspec.
1148(define_insn "neon_load_count"
1149  [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1150        (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1151                   UNSPEC_LOAD_COUNT))]
1152  "TARGET_NEON"
1153  "@
1154   vld1.32\t{%P0[0]}, %A1
1155   vmov.32\t%P0[0], %1"
1156  [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1157)
1158
1159(define_insn "ashldi3_neon_noclobber"
1160  [(set (match_operand:DI 0 "s_register_operand"	    "=w,w")
1161	(ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1162		   (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1163  "TARGET_NEON && reload_completed
1164   && (!CONST_INT_P (operands[2])
1165       || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1166  "@
1167   vshl.u64\t%P0, %P1, %2
1168   vshl.u64\t%P0, %P1, %P2"
1169  [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1170)
1171
1172(define_insn_and_split "ashldi3_neon"
1173  [(set (match_operand:DI 0 "s_register_operand"	    "= w, w, &r, r, &r, ?w,?w")
1174	(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w, w")
1175		   (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm, i")))
1176   (clobber (match_scratch:SI 3				    "= X, X, &r, X,  X,  X, X"))
1177   (clobber (match_scratch:SI 4				    "= X, X, &r, X,  X,  X, X"))
1178   (clobber (match_scratch:DI 5				    "=&w, X,  X, X,  X, &w, X"))
1179   (clobber (reg:CC_C CC_REGNUM))]
1180  "TARGET_NEON"
1181  "#"
1182  "TARGET_NEON && reload_completed"
1183  [(const_int 0)]
1184  "
1185  {
1186    if (IS_VFP_REGNUM (REGNO (operands[0])))
1187      {
1188        if (CONST_INT_P (operands[2]))
1189	  {
1190	    if (INTVAL (operands[2]) < 1)
1191	      {
1192	        emit_insn (gen_movdi (operands[0], operands[1]));
1193		DONE;
1194	      }
1195	    else if (INTVAL (operands[2]) > 63)
1196	      operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1197	  }
1198	else
1199	  {
1200	    emit_insn (gen_neon_load_count (operands[5], operands[2]));
1201	    operands[2] = operands[5];
1202	  }
1203
1204	/* Ditch the unnecessary clobbers.  */
1205	emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1206					       operands[2]));
1207      }
1208    else
1209      {
1210	/* The shift expanders support either full overlap or no overlap.  */
1211	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1212		    || REGNO (operands[0]) == REGNO (operands[1]));
1213
1214	arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1215				       operands[2], operands[3], operands[4]);
1216      }
1217    DONE;
1218  }"
1219  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1220   (set_attr "opt" "*,*,speed,speed,speed,*,*")
1221   (set_attr "type" "multiple")]
1222)
1223
1224; The shift amount needs to be negated for right-shifts
1225(define_insn "signed_shift_di3_neon"
1226  [(set (match_operand:DI 0 "s_register_operand"	     "=w")
1227	(unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1228		    (match_operand:DI 2 "s_register_operand" " w")]
1229		   UNSPEC_ASHIFT_SIGNED))]
1230  "TARGET_NEON && reload_completed"
1231  "vshl.s64\t%P0, %P1, %P2"
1232  [(set_attr "type" "neon_shift_reg")]
1233)
1234
1235; The shift amount needs to be negated for right-shifts
1236(define_insn "unsigned_shift_di3_neon"
1237  [(set (match_operand:DI 0 "s_register_operand"	     "=w")
1238	(unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1239		    (match_operand:DI 2 "s_register_operand" " w")]
1240		   UNSPEC_ASHIFT_UNSIGNED))]
1241  "TARGET_NEON && reload_completed"
1242  "vshl.u64\t%P0, %P1, %P2"
1243  [(set_attr "type" "neon_shift_reg")]
1244)
1245
1246(define_insn "ashrdi3_neon_imm_noclobber"
1247  [(set (match_operand:DI 0 "s_register_operand"	      "=w")
1248	(ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1249		     (match_operand:DI 2 "const_int_operand"  " i")))]
1250  "TARGET_NEON && reload_completed
1251   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1252  "vshr.s64\t%P0, %P1, %2"
1253  [(set_attr "type" "neon_shift_imm")]
1254)
1255
1256(define_insn "lshrdi3_neon_imm_noclobber"
1257  [(set (match_operand:DI 0 "s_register_operand"	      "=w")
1258	(lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1259		     (match_operand:DI 2 "const_int_operand"  " i")))]
1260  "TARGET_NEON && reload_completed
1261   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1262  "vshr.u64\t%P0, %P1, %2"
1263  [(set_attr "type" "neon_shift_imm")]
1264)
1265
1266;; ashrdi3_neon
1267;; lshrdi3_neon
1268(define_insn_and_split "<shift>di3_neon"
1269  [(set (match_operand:DI 0 "s_register_operand"	     "= w, w, &r, r, &r,?w,?w")
1270	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
1271		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
1272   (clobber (match_scratch:SI 3				     "=2r, X, &r, X,  X,2r, X"))
1273   (clobber (match_scratch:SI 4				     "= X, X, &r, X,  X, X, X"))
1274   (clobber (match_scratch:DI 5				     "=&w, X,  X, X, X,&w, X"))
1275   (clobber (reg:CC CC_REGNUM))]
1276  "TARGET_NEON"
1277  "#"
1278  "TARGET_NEON && reload_completed"
1279  [(const_int 0)]
1280  "
1281  {
1282    if (IS_VFP_REGNUM (REGNO (operands[0])))
1283      {
1284	if (CONST_INT_P (operands[2]))
1285	  {
1286	    if (INTVAL (operands[2]) < 1)
1287	      {
1288	        emit_insn (gen_movdi (operands[0], operands[1]));
1289		DONE;
1290	      }
1291	    else if (INTVAL (operands[2]) > 64)
1292	      operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1293
1294	    /* Ditch the unnecessary clobbers.  */
1295	    emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1296							  operands[1],
1297							  operands[2]));
1298	  }
1299	else
1300	  {
1301	    /* We must use a negative left-shift.  */
1302	    emit_insn (gen_negsi2 (operands[3], operands[2]));
1303	    emit_insn (gen_neon_load_count (operands[5], operands[3]));
1304	    emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1305						       operands[5]));
1306	  }
1307      }
1308    else
1309      {
1310	/* The shift expanders support either full overlap or no overlap.  */
1311	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1312		    || REGNO (operands[0]) == REGNO (operands[1]));
1313
1314	/* This clobbers CC (ASHIFTRT by register only).  */
1315	arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1316				       operands[2], operands[3], operands[4]);
1317      }
1318
1319    DONE;
1320  }"
1321  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1322   (set_attr "opt" "*,*,speed,speed,speed,*,*")
1323   (set_attr "type" "multiple")]
1324)
1325
1326;; Widening operations
1327
1328(define_expand "widen_ssum<mode>3"
1329  [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1330	(plus:<V_double_width>
1331	 (sign_extend:<V_double_width>
1332	  (match_operand:VQI 1 "s_register_operand" ""))
1333	 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1334  "TARGET_NEON"
1335  {
1336    machine_mode mode = GET_MODE (operands[1]);
1337    rtx p1, p2;
1338
1339    p1  = arm_simd_vect_par_cnst_half (mode, false);
1340    p2  = arm_simd_vect_par_cnst_half (mode, true);
1341
1342    if (operands[0] != operands[2])
1343      emit_move_insn (operands[0], operands[2]);
1344
1345    emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1346							 operands[1],
1347							 p1,
1348							 operands[0]));
1349    emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1350							 operands[1],
1351							 p2,
1352							 operands[0]));
1353    DONE;
1354  }
1355)
1356
1357(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1358  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1359	(plus:<V_double_width>
1360	 (sign_extend:<V_double_width>
1361	  (vec_select:<V_HALF>
1362	   (match_operand:VQI 1 "s_register_operand" "%w")
1363	   (match_operand:VQI 2 "vect_par_constant_low" "")))
1364	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1365  "TARGET_NEON"
1366{
1367  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1368    "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1369}
1370  [(set_attr "type" "neon_add_widen")])
1371
1372(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1373  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1374	(plus:<V_double_width>
1375	 (sign_extend:<V_double_width>
1376	  (vec_select:<V_HALF>
1377			 (match_operand:VQI 1 "s_register_operand" "%w")
1378			 (match_operand:VQI 2 "vect_par_constant_high" "")))
1379	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1380  "TARGET_NEON"
1381{
1382  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1383    "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1384}
1385  [(set_attr "type" "neon_add_widen")])
1386
1387(define_insn "widen_ssum<mode>3"
1388  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1389	(plus:<V_widen>
1390	 (sign_extend:<V_widen>
1391	  (match_operand:VW 1 "s_register_operand" "%w"))
1392	 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1393  "TARGET_NEON"
1394  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1395  [(set_attr "type" "neon_add_widen")]
1396)
1397
1398(define_expand "widen_usum<mode>3"
1399  [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1400	(plus:<V_double_width>
1401	 (zero_extend:<V_double_width>
1402	  (match_operand:VQI 1 "s_register_operand" ""))
1403	 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1404  "TARGET_NEON"
1405  {
1406    machine_mode mode = GET_MODE (operands[1]);
1407    rtx p1, p2;
1408
1409    p1  = arm_simd_vect_par_cnst_half (mode, false);
1410    p2  = arm_simd_vect_par_cnst_half (mode, true);
1411
1412    if (operands[0] != operands[2])
1413      emit_move_insn (operands[0], operands[2]);
1414
1415    emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1416							 operands[1],
1417							 p1,
1418							 operands[0]));
1419    emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1420							 operands[1],
1421							 p2,
1422							 operands[0]));
1423    DONE;
1424  }
1425)
1426
1427(define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1428  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1429	(plus:<V_double_width>
1430	 (zero_extend:<V_double_width>
1431	  (vec_select:<V_HALF>
1432	   (match_operand:VQI 1 "s_register_operand" "%w")
1433	   (match_operand:VQI 2 "vect_par_constant_low" "")))
1434	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1435  "TARGET_NEON"
1436{
1437  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1438    "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1439}
1440  [(set_attr "type" "neon_add_widen")])
1441
1442(define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1443  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1444	(plus:<V_double_width>
1445	 (zero_extend:<V_double_width>
1446	  (vec_select:<V_HALF>
1447			 (match_operand:VQI 1 "s_register_operand" "%w")
1448			 (match_operand:VQI 2 "vect_par_constant_high" "")))
1449	 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1450  "TARGET_NEON"
1451{
1452 return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1453    "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1454}
1455  [(set_attr "type" "neon_add_widen")])
1456
1457(define_insn "widen_usum<mode>3"
1458  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1459	(plus:<V_widen> (zero_extend:<V_widen>
1460			  (match_operand:VW 1 "s_register_operand" "%w"))
1461		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1462  "TARGET_NEON"
1463  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1464  [(set_attr "type" "neon_add_widen")]
1465)
1466
1467;; Helpers for quad-word reduction operations
1468
1469; Add (or smin, smax...) the low N/2 elements of the N-element vector
1470; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1471; N/2-element vector.
1472
1473(define_insn "quad_halves_<code>v4si"
1474  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1475        (VQH_OPS:V2SI
1476          (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1477                           (parallel [(const_int 0) (const_int 1)]))
1478          (vec_select:V2SI (match_dup 1)
1479                           (parallel [(const_int 2) (const_int 3)]))))]
1480  "TARGET_NEON"
1481  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1482  [(set_attr "vqh_mnem" "<VQH_mnem>")
1483   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1484)
1485
1486(define_insn "quad_halves_<code>v4sf"
1487  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1488        (VQHS_OPS:V2SF
1489          (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1490                           (parallel [(const_int 0) (const_int 1)]))
1491          (vec_select:V2SF (match_dup 1)
1492                           (parallel [(const_int 2) (const_int 3)]))))]
1493  "TARGET_NEON && flag_unsafe_math_optimizations"
1494  "<VQH_mnem>.f32\t%P0, %e1, %f1"
1495  [(set_attr "vqh_mnem" "<VQH_mnem>")
1496   (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1497)
1498
1499(define_insn "quad_halves_<code>v8hi"
1500  [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1501        (VQH_OPS:V4HI
1502          (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1503                           (parallel [(const_int 0) (const_int 1)
1504				      (const_int 2) (const_int 3)]))
1505          (vec_select:V4HI (match_dup 1)
1506                           (parallel [(const_int 4) (const_int 5)
1507				      (const_int 6) (const_int 7)]))))]
1508  "TARGET_NEON"
1509  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1510  [(set_attr "vqh_mnem" "<VQH_mnem>")
1511   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1512)
1513
1514(define_insn "quad_halves_<code>v16qi"
1515  [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1516        (VQH_OPS:V8QI
1517          (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1518                           (parallel [(const_int 0) (const_int 1)
1519				      (const_int 2) (const_int 3)
1520				      (const_int 4) (const_int 5)
1521				      (const_int 6) (const_int 7)]))
1522          (vec_select:V8QI (match_dup 1)
1523                           (parallel [(const_int 8) (const_int 9)
1524				      (const_int 10) (const_int 11)
1525				      (const_int 12) (const_int 13)
1526				      (const_int 14) (const_int 15)]))))]
1527  "TARGET_NEON"
1528  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1529  [(set_attr "vqh_mnem" "<VQH_mnem>")
1530   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1531)
1532
1533(define_expand "move_hi_quad_<mode>"
1534 [(match_operand:ANY128 0 "s_register_operand" "")
1535  (match_operand:<V_HALF> 1 "s_register_operand" "")]
1536 "TARGET_NEON"
1537{
1538  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1539				       GET_MODE_SIZE (<V_HALF>mode)),
1540		  operands[1]);
1541  DONE;
1542})
1543
1544(define_expand "move_lo_quad_<mode>"
1545 [(match_operand:ANY128 0 "s_register_operand" "")
1546  (match_operand:<V_HALF> 1 "s_register_operand" "")]
1547 "TARGET_NEON"
1548{
1549  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1550				       <MODE>mode, 0),
1551		  operands[1]);
1552  DONE;
1553})
1554
1555;; Reduction operations
1556
1557(define_expand "reduc_plus_scal_<mode>"
1558  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1559   (match_operand:VD 1 "s_register_operand" "")]
1560  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1561{
1562  rtx vec = gen_reg_rtx (<MODE>mode);
1563  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1564			&gen_neon_vpadd_internal<mode>);
1565  /* The same result is actually computed into every element.  */
1566  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1567  DONE;
1568})
1569
1570(define_expand "reduc_plus_scal_<mode>"
1571  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1572   (match_operand:VQ 1 "s_register_operand" "")]
1573  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1574   && !BYTES_BIG_ENDIAN"
1575{
1576  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1577
1578  emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1579  emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1580
1581  DONE;
1582})
1583
1584(define_expand "reduc_plus_scal_v2di"
1585  [(match_operand:DI 0 "nonimmediate_operand" "=w")
1586   (match_operand:V2DI 1 "s_register_operand" "")]
1587  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1588{
1589  rtx vec = gen_reg_rtx (V2DImode);
1590
1591  emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1592  emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1593
1594  DONE;
1595})
1596
1597(define_insn "arm_reduc_plus_internal_v2di"
1598  [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1599	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1600		     UNSPEC_VPADD))]
1601  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1602  "vadd.i64\t%e0, %e1, %f1"
1603  [(set_attr "type" "neon_add_q")]
1604)
1605
1606(define_expand "reduc_smin_scal_<mode>"
1607  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1608   (match_operand:VD 1 "s_register_operand" "")]
1609  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1610{
1611  rtx vec = gen_reg_rtx (<MODE>mode);
1612
1613  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1614			&gen_neon_vpsmin<mode>);
1615  /* The result is computed into every element of the vector.  */
1616  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1617  DONE;
1618})
1619
1620(define_expand "reduc_smin_scal_<mode>"
1621  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1622   (match_operand:VQ 1 "s_register_operand" "")]
1623  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1624   && !BYTES_BIG_ENDIAN"
1625{
1626  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1627
1628  emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1629  emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1630
1631  DONE;
1632})
1633
1634(define_expand "reduc_smax_scal_<mode>"
1635  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1636   (match_operand:VD 1 "s_register_operand" "")]
1637  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1638{
1639  rtx vec = gen_reg_rtx (<MODE>mode);
1640  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1641			&gen_neon_vpsmax<mode>);
1642  /* The result is computed into every element of the vector.  */
1643  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1644  DONE;
1645})
1646
1647(define_expand "reduc_smax_scal_<mode>"
1648  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1649   (match_operand:VQ 1 "s_register_operand" "")]
1650  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1651   && !BYTES_BIG_ENDIAN"
1652{
1653  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1654
1655  emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1656  emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1657
1658  DONE;
1659})
1660
1661(define_expand "reduc_umin_scal_<mode>"
1662  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1663   (match_operand:VDI 1 "s_register_operand" "")]
1664  "TARGET_NEON"
1665{
1666  rtx vec = gen_reg_rtx (<MODE>mode);
1667  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1668			&gen_neon_vpumin<mode>);
1669  /* The result is computed into every element of the vector.  */
1670  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1671  DONE;
1672})
1673
1674(define_expand "reduc_umin_scal_<mode>"
1675  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1676   (match_operand:VQI 1 "s_register_operand" "")]
1677  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1678{
1679  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1680
1681  emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1682  emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1683
1684  DONE;
1685})
1686
1687(define_expand "reduc_umax_scal_<mode>"
1688  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1689   (match_operand:VDI 1 "s_register_operand" "")]
1690  "TARGET_NEON"
1691{
1692  rtx vec = gen_reg_rtx (<MODE>mode);
1693  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1694			&gen_neon_vpumax<mode>);
1695  /* The result is computed into every element of the vector.  */
1696  emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1697  DONE;
1698})
1699
1700(define_expand "reduc_umax_scal_<mode>"
1701  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1702   (match_operand:VQI 1 "s_register_operand" "")]
1703  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1704{
1705  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1706
1707  emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1708  emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1709
1710  DONE;
1711})
1712
1713(define_insn "neon_vpadd_internal<mode>"
1714  [(set (match_operand:VD 0 "s_register_operand" "=w")
1715	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1716		    (match_operand:VD 2 "s_register_operand" "w")]
1717                   UNSPEC_VPADD))]
1718  "TARGET_NEON"
1719  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1720  ;; Assume this schedules like vadd.
1721  [(set (attr "type")
1722      (if_then_else (match_test "<Is_float_mode>")
1723                    (const_string "neon_fp_reduc_add_s<q>")
1724                    (const_string "neon_reduc_add<q>")))]
1725)
1726
1727(define_insn "neon_vpaddv4hf"
1728 [(set
1729   (match_operand:V4HF 0 "s_register_operand" "=w")
1730   (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1731		 (match_operand:V4HF 2 "s_register_operand" "w")]
1732    UNSPEC_VPADD))]
1733 "TARGET_NEON_FP16INST"
1734 "vpadd.f16\t%P0, %P1, %P2"
1735 [(set_attr "type" "neon_reduc_add")]
1736)
1737
1738(define_insn "neon_vpsmin<mode>"
1739  [(set (match_operand:VD 0 "s_register_operand" "=w")
1740	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1741		    (match_operand:VD 2 "s_register_operand" "w")]
1742                   UNSPEC_VPSMIN))]
1743  "TARGET_NEON"
1744  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1745  [(set (attr "type")
1746      (if_then_else (match_test "<Is_float_mode>")
1747                    (const_string "neon_fp_reduc_minmax_s<q>")
1748                    (const_string "neon_reduc_minmax<q>")))]
1749)
1750
1751(define_insn "neon_vpsmax<mode>"
1752  [(set (match_operand:VD 0 "s_register_operand" "=w")
1753	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1754		    (match_operand:VD 2 "s_register_operand" "w")]
1755                   UNSPEC_VPSMAX))]
1756  "TARGET_NEON"
1757  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1758  [(set (attr "type")
1759      (if_then_else (match_test "<Is_float_mode>")
1760                    (const_string "neon_fp_reduc_minmax_s<q>")
1761                    (const_string "neon_reduc_minmax<q>")))]
1762)
1763
1764(define_insn "neon_vpumin<mode>"
1765  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1766	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1767		     (match_operand:VDI 2 "s_register_operand" "w")]
1768                   UNSPEC_VPUMIN))]
1769  "TARGET_NEON"
1770  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1771  [(set_attr "type" "neon_reduc_minmax<q>")]
1772)
1773
1774(define_insn "neon_vpumax<mode>"
1775  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1776	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1777		     (match_operand:VDI 2 "s_register_operand" "w")]
1778                   UNSPEC_VPUMAX))]
1779  "TARGET_NEON"
1780  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1781  [(set_attr "type" "neon_reduc_minmax<q>")]
1782)
1783
1784;; Saturating arithmetic
1785
1786; NOTE: Neon supports many more saturating variants of instructions than the
1787; following, but these are all GCC currently understands.
1788; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1789; yet either, although these patterns may be used by intrinsics when they're
1790; added.
1791
1792(define_insn "*ss_add<mode>_neon"
1793  [(set (match_operand:VD 0 "s_register_operand" "=w")
1794       (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1795                   (match_operand:VD 2 "s_register_operand" "w")))]
1796  "TARGET_NEON"
1797  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1798  [(set_attr "type" "neon_qadd<q>")]
1799)
1800
1801(define_insn "*us_add<mode>_neon"
1802  [(set (match_operand:VD 0 "s_register_operand" "=w")
1803       (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1804                   (match_operand:VD 2 "s_register_operand" "w")))]
1805  "TARGET_NEON"
1806  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1807  [(set_attr "type" "neon_qadd<q>")]
1808)
1809
1810(define_insn "*ss_sub<mode>_neon"
1811  [(set (match_operand:VD 0 "s_register_operand" "=w")
1812       (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1813                    (match_operand:VD 2 "s_register_operand" "w")))]
1814  "TARGET_NEON"
1815  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1816  [(set_attr "type" "neon_qsub<q>")]
1817)
1818
1819(define_insn "*us_sub<mode>_neon"
1820  [(set (match_operand:VD 0 "s_register_operand" "=w")
1821       (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1822                    (match_operand:VD 2 "s_register_operand" "w")))]
1823  "TARGET_NEON"
1824  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1825  [(set_attr "type" "neon_qsub<q>")]
1826)
1827
1828;; Conditional instructions.  These are comparisons with conditional moves for
1829;; vectors.  They perform the assignment:
1830;;
1831;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1832;;
1833;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
1834;; element-wise.
1835
1836(define_expand "vcond<mode><mode>"
1837  [(set (match_operand:VDQW 0 "s_register_operand" "")
1838	(if_then_else:VDQW
1839	  (match_operator 3 "comparison_operator"
1840	    [(match_operand:VDQW 4 "s_register_operand" "")
1841	     (match_operand:VDQW 5 "nonmemory_operand" "")])
1842	  (match_operand:VDQW 1 "s_register_operand" "")
1843	  (match_operand:VDQW 2 "s_register_operand" "")))]
1844  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1845{
1846  int inverse = 0;
1847  int use_zero_form = 0;
1848  int swap_bsl_operands = 0;
1849  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1850  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1851
1852  rtx (*base_comparison) (rtx, rtx, rtx);
1853  rtx (*complimentary_comparison) (rtx, rtx, rtx);
1854
1855  switch (GET_CODE (operands[3]))
1856    {
1857    case GE:
1858    case GT:
1859    case LE:
1860    case LT:
1861    case EQ:
1862      if (operands[5] == CONST0_RTX (<MODE>mode))
1863	{
1864	  use_zero_form = 1;
1865	  break;
1866	}
1867      /* Fall through.  */
1868    default:
1869      if (!REG_P (operands[5]))
1870	operands[5] = force_reg (<MODE>mode, operands[5]);
1871    }
1872
1873  switch (GET_CODE (operands[3]))
1874    {
1875    case LT:
1876    case UNLT:
1877      inverse = 1;
1878      /* Fall through.  */
1879    case GE:
1880    case UNGE:
1881    case ORDERED:
1882    case UNORDERED:
1883      base_comparison = gen_neon_vcge<mode>;
1884      complimentary_comparison = gen_neon_vcgt<mode>;
1885      break;
1886    case LE:
1887    case UNLE:
1888      inverse = 1;
1889      /* Fall through.  */
1890    case GT:
1891    case UNGT:
1892      base_comparison = gen_neon_vcgt<mode>;
1893      complimentary_comparison = gen_neon_vcge<mode>;
1894      break;
1895    case EQ:
1896    case NE:
1897    case UNEQ:
1898      base_comparison = gen_neon_vceq<mode>;
1899      complimentary_comparison = gen_neon_vceq<mode>;
1900      break;
1901    default:
1902      gcc_unreachable ();
1903    }
1904
1905  switch (GET_CODE (operands[3]))
1906    {
1907    case LT:
1908    case LE:
1909    case GT:
1910    case GE:
1911    case EQ:
1912      /* The easy case.  Here we emit one of vcge, vcgt or vceq.
1913	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
1914	 a GE b -> a GE b
1915	 a GT b -> a GT b
1916	 a LE b -> b GE a
1917	 a LT b -> b GT a
1918	 a EQ b -> a EQ b
1919	 Note that there also exist direct comparison against 0 forms,
1920	 so catch those as a special case.  */
1921      if (use_zero_form)
1922	{
1923	  inverse = 0;
1924	  switch (GET_CODE (operands[3]))
1925	    {
1926	    case LT:
1927	      base_comparison = gen_neon_vclt<mode>;
1928	      break;
1929	    case LE:
1930	      base_comparison = gen_neon_vcle<mode>;
1931	      break;
1932	    default:
1933	      /* Do nothing, other zero form cases already have the correct
1934		 base_comparison.  */
1935	      break;
1936	    }
1937	}
1938
1939      if (!inverse)
1940	emit_insn (base_comparison (mask, operands[4], operands[5]));
1941      else
1942	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1943      break;
1944    case UNLT:
1945    case UNLE:
1946    case UNGT:
1947    case UNGE:
1948    case NE:
1949      /* Vector compare returns false for lanes which are unordered, so if we use
1950	 the inverse of the comparison we actually want to emit, then
1951	 swap the operands to BSL, we will end up with the correct result.
1952	 Note that a NE NaN and NaN NE b are true for all a, b.
1953
1954	 Our transformations are:
1955	 a GE b -> !(b GT a)
1956	 a GT b -> !(b GE a)
1957	 a LE b -> !(a GT b)
1958	 a LT b -> !(a GE b)
1959	 a NE b -> !(a EQ b)  */
1960
1961      if (inverse)
1962	emit_insn (base_comparison (mask, operands[4], operands[5]));
1963      else
1964	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1965
1966      swap_bsl_operands = 1;
1967      break;
1968    case UNEQ:
1969      /* We check (a > b ||  b > a).  combining these comparisons give us
1970	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1971	 will then give us (a == b ||  a UNORDERED b) as intended.  */
1972
1973      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1974      emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1975      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1976      swap_bsl_operands = 1;
1977      break;
1978    case UNORDERED:
1979       /* Operands are ORDERED iff (a > b || b >= a).
1980	 Swapping the operands to BSL will give the UNORDERED case.  */
1981     swap_bsl_operands = 1;
1982     /* Fall through.  */
1983    case ORDERED:
1984      emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1985      emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1986      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1987      break;
1988    default:
1989      gcc_unreachable ();
1990    }
1991
1992  if (swap_bsl_operands)
1993    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1994				    operands[1]));
1995  else
1996    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1997				    operands[2]));
1998  DONE;
1999})
2000
2001(define_expand "vcondu<mode><mode>"
2002  [(set (match_operand:VDQIW 0 "s_register_operand" "")
2003	(if_then_else:VDQIW
2004	  (match_operator 3 "arm_comparison_operator"
2005	    [(match_operand:VDQIW 4 "s_register_operand" "")
2006	     (match_operand:VDQIW 5 "s_register_operand" "")])
2007	  (match_operand:VDQIW 1 "s_register_operand" "")
2008	  (match_operand:VDQIW 2 "s_register_operand" "")))]
2009  "TARGET_NEON"
2010{
2011  rtx mask;
2012  int inverse = 0, immediate_zero = 0;
2013
2014  mask = gen_reg_rtx (<V_cmp_result>mode);
2015
2016  if (operands[5] == CONST0_RTX (<MODE>mode))
2017    immediate_zero = 1;
2018  else if (!REG_P (operands[5]))
2019    operands[5] = force_reg (<MODE>mode, operands[5]);
2020
2021  switch (GET_CODE (operands[3]))
2022    {
2023    case GEU:
2024      emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2025      break;
2026
2027    case GTU:
2028      emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2029      break;
2030
2031    case EQ:
2032      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2033      break;
2034
2035    case LEU:
2036      if (immediate_zero)
2037	emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2038      else
2039	emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2040      break;
2041
2042    case LTU:
2043      if (immediate_zero)
2044        emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2045      else
2046	emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2047      break;
2048
2049    case NE:
2050      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2051      inverse = 1;
2052      break;
2053
2054    default:
2055      gcc_unreachable ();
2056    }
2057
2058  if (inverse)
2059    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2060				    operands[1]));
2061  else
2062    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2063				    operands[2]));
2064
2065  DONE;
2066})
2067
2068;; Patterns for builtins.
2069
2070; good for plain vadd, vaddq.
2071
2072(define_expand "neon_vadd<mode>"
2073  [(match_operand:VCVTF 0 "s_register_operand" "=w")
2074   (match_operand:VCVTF 1 "s_register_operand" "w")
2075   (match_operand:VCVTF 2 "s_register_operand" "w")]
2076  "TARGET_NEON"
2077{
2078  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2079    emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2080  else
2081    emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2082					   operands[2]));
2083  DONE;
2084})
2085
2086(define_expand "neon_vadd<mode>"
2087  [(match_operand:VH 0 "s_register_operand")
2088   (match_operand:VH 1 "s_register_operand")
2089   (match_operand:VH 2 "s_register_operand")]
2090  "TARGET_NEON_FP16INST"
2091{
2092  emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2093  DONE;
2094})
2095
2096(define_expand "neon_vsub<mode>"
2097  [(match_operand:VH 0 "s_register_operand")
2098   (match_operand:VH 1 "s_register_operand")
2099   (match_operand:VH 2 "s_register_operand")]
2100  "TARGET_NEON_FP16INST"
2101{
2102  emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2103  DONE;
2104})
2105
2106; Note that NEON operations don't support the full IEEE 754 standard: in
2107; particular, denormal values are flushed to zero.  This means that GCC cannot
2108; use those instructions for autovectorization, etc. unless
2109; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2110; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
2111; header) must work in either case: if -funsafe-math-optimizations is given,
2112; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2113; expand to unspecs (which may potentially limit the extent to which they might
2114; be optimized by generic code).
2115
2116; Used for intrinsics when flag_unsafe_math_optimizations is false.
2117
2118(define_insn "neon_vadd<mode>_unspec"
2119  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2120        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2121		      (match_operand:VCVTF 2 "s_register_operand" "w")]
2122                     UNSPEC_VADD))]
2123  "TARGET_NEON"
2124  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2125  [(set (attr "type")
2126      (if_then_else (match_test "<Is_float_mode>")
2127                    (const_string "neon_fp_addsub_s<q>")
2128                    (const_string "neon_add<q>")))]
2129)
2130
2131(define_insn "neon_vaddl<sup><mode>"
2132  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2133        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2134		           (match_operand:VDI 2 "s_register_operand" "w")]
2135                          VADDL))]
2136  "TARGET_NEON"
2137  "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2138  [(set_attr "type" "neon_add_long")]
2139)
2140
2141(define_insn "neon_vaddw<sup><mode>"
2142  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2143        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2144		           (match_operand:VDI 2 "s_register_operand" "w")]
2145                          VADDW))]
2146  "TARGET_NEON"
2147  "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2148  [(set_attr "type" "neon_add_widen")]
2149)
2150
2151; vhadd and vrhadd.
2152
2153(define_insn "neon_v<r>hadd<sup><mode>"
2154  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2155        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2156		       (match_operand:VDQIW 2 "s_register_operand" "w")]
2157		      VHADD))]
2158  "TARGET_NEON"
2159  "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2160  [(set_attr "type" "neon_add_halve_q")]
2161)
2162
2163(define_insn "neon_vqadd<sup><mode>"
2164  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2165        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2166		       (match_operand:VDQIX 2 "s_register_operand" "w")]
2167                     VQADD))]
2168  "TARGET_NEON"
2169  "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2170  [(set_attr "type" "neon_qadd<q>")]
2171)
2172
2173(define_insn "neon_v<r>addhn<mode>"
2174  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2175        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2176		            (match_operand:VN 2 "s_register_operand" "w")]
2177                           VADDHN))]
2178  "TARGET_NEON"
2179  "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2180  [(set_attr "type" "neon_add_halve_narrow_q")]
2181)
2182
2183;; Polynomial and Float multiplication.
2184(define_insn "neon_vmul<pf><mode>"
2185  [(set (match_operand:VPF 0 "s_register_operand" "=w")
2186        (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2187		      (match_operand:VPF 2 "s_register_operand" "w")]
2188		     UNSPEC_VMUL))]
2189  "TARGET_NEON"
2190  "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2191  [(set (attr "type")
2192      (if_then_else (match_test "<Is_float_mode>")
2193                    (const_string "neon_fp_mul_s<q>")
2194                    (const_string "neon_mul_<V_elem_ch><q>")))]
2195)
2196
2197(define_insn "mul<mode>3"
2198 [(set
2199   (match_operand:VH 0 "s_register_operand" "=w")
2200   (mult:VH
2201    (match_operand:VH 1 "s_register_operand" "w")
2202    (match_operand:VH 2 "s_register_operand" "w")))]
2203  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2204  "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2205 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2206)
2207
2208(define_insn "neon_vmulf<mode>"
2209 [(set
2210   (match_operand:VH 0 "s_register_operand" "=w")
2211   (mult:VH
2212    (match_operand:VH 1 "s_register_operand" "w")
2213    (match_operand:VH 2 "s_register_operand" "w")))]
2214  "TARGET_NEON_FP16INST"
2215  "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2216 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2217)
2218
2219(define_expand "neon_vmla<mode>"
2220  [(match_operand:VDQW 0 "s_register_operand" "=w")
2221   (match_operand:VDQW 1 "s_register_operand" "0")
2222   (match_operand:VDQW 2 "s_register_operand" "w")
2223   (match_operand:VDQW 3 "s_register_operand" "w")]
2224  "TARGET_NEON"
2225{
2226  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2227    emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2228				             operands[2], operands[3]));
2229  else
2230    emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2231					   operands[2], operands[3]));
2232  DONE;
2233})
2234
2235(define_expand "neon_vfma<VCVTF:mode>"
2236  [(match_operand:VCVTF 0 "s_register_operand")
2237   (match_operand:VCVTF 1 "s_register_operand")
2238   (match_operand:VCVTF 2 "s_register_operand")
2239   (match_operand:VCVTF 3 "s_register_operand")]
2240  "TARGET_NEON && TARGET_FMA"
2241{
2242  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2243				       operands[1]));
2244  DONE;
2245})
2246
2247(define_expand "neon_vfma<VH:mode>"
2248  [(match_operand:VH 0 "s_register_operand")
2249   (match_operand:VH 1 "s_register_operand")
2250   (match_operand:VH 2 "s_register_operand")
2251   (match_operand:VH 3 "s_register_operand")]
2252  "TARGET_NEON_FP16INST"
2253{
2254  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2255				       operands[1]));
2256  DONE;
2257})
2258
2259(define_expand "neon_vfms<VCVTF:mode>"
2260  [(match_operand:VCVTF 0 "s_register_operand")
2261   (match_operand:VCVTF 1 "s_register_operand")
2262   (match_operand:VCVTF 2 "s_register_operand")
2263   (match_operand:VCVTF 3 "s_register_operand")]
2264  "TARGET_NEON && TARGET_FMA"
2265{
2266  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2267					 operands[1]));
2268  DONE;
2269})
2270
2271(define_expand "neon_vfms<VH:mode>"
2272  [(match_operand:VH 0 "s_register_operand")
2273   (match_operand:VH 1 "s_register_operand")
2274   (match_operand:VH 2 "s_register_operand")
2275   (match_operand:VH 3 "s_register_operand")]
2276  "TARGET_NEON_FP16INST"
2277{
2278  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2279					 operands[1]));
2280  DONE;
2281})
2282
2283;; The expand RTL structure here is not important.
2284;; We use the gen_* functions anyway.
2285;; We just need something to wrap the iterators around.
2286
2287(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2288  [(set (match_operand:VCVTF 0 "s_register_operand")
2289     (unspec:VCVTF
2290	[(match_operand:VCVTF 1 "s_register_operand")
2291	   (PLUSMINUS:<VFML>
2292	     (match_operand:<VFML> 2 "s_register_operand")
2293	     (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2294  "TARGET_FP16FML"
2295{
2296  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2297  emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2298							     operands[1],
2299							     operands[2],
2300							     operands[3],
2301							     half, half));
2302  DONE;
2303})
2304
2305(define_insn "vfmal_low<mode>_intrinsic"
2306 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2307	(fma:VCVTF
2308	 (float_extend:VCVTF
2309	  (vec_select:<VFMLSEL>
2310	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2311	   (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2312	 (float_extend:VCVTF
2313	  (vec_select:<VFMLSEL>
2314	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2315	   (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2316	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2317 "TARGET_FP16FML"
2318 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2319 [(set_attr "type" "neon_fp_mla_s<q>")]
2320)
2321
2322(define_insn "vfmsl_high<mode>_intrinsic"
2323 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2324	(fma:VCVTF
2325	 (float_extend:VCVTF
2326	  (neg:<VFMLSEL>
2327	    (vec_select:<VFMLSEL>
2328	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2329	      (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2330	 (float_extend:VCVTF
2331	  (vec_select:<VFMLSEL>
2332	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2333	   (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2334	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2335 "TARGET_FP16FML"
2336 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2337 [(set_attr "type" "neon_fp_mla_s<q>")]
2338)
2339
2340(define_insn "vfmal_high<mode>_intrinsic"
2341 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2342	(fma:VCVTF
2343	 (float_extend:VCVTF
2344	  (vec_select:<VFMLSEL>
2345	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2346	   (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2347	 (float_extend:VCVTF
2348	  (vec_select:<VFMLSEL>
2349	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2350	   (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2351	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2352 "TARGET_FP16FML"
2353 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2354 [(set_attr "type" "neon_fp_mla_s<q>")]
2355)
2356
2357(define_insn "vfmsl_low<mode>_intrinsic"
2358 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2359	(fma:VCVTF
2360	 (float_extend:VCVTF
2361	  (neg:<VFMLSEL>
2362	    (vec_select:<VFMLSEL>
2363	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2364	      (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2365	 (float_extend:VCVTF
2366	  (vec_select:<VFMLSEL>
2367	   (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2368	   (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2369	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2370 "TARGET_FP16FML"
2371 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2372 [(set_attr "type" "neon_fp_mla_s<q>")]
2373)
2374
2375(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2376  [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2377     (unspec:VCVTF
2378	[(match_operand:VCVTF 1 "s_register_operand")
2379	 (PLUSMINUS:<VFML>
2380	   (match_operand:<VFML> 2 "s_register_operand")
2381	   (match_operand:<VFML> 3 "s_register_operand"))
2382	 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2383  "TARGET_FP16FML"
2384{
2385  rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2386  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2387  emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2388					       (operands[0], operands[1],
2389						operands[2], operands[3],
2390						half, lane));
2391  DONE;
2392})
2393
2394(define_insn "vfmal_lane_low<mode>_intrinsic"
2395 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2396	(fma:VCVTF
2397	 (float_extend:VCVTF
2398	  (vec_select:<VFMLSEL>
2399	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2400	   (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2401	 (float_extend:VCVTF
2402	   (vec_duplicate:<VFMLSEL>
2403	     (vec_select:HF
2404	       (match_operand:<VFML> 3 "s_register_operand" "x")
2405	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2406	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2407 "TARGET_FP16FML"
2408 {
2409    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2410    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2411      {
2412	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2413	return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2414      }
2415    else
2416      {
2417	operands[5] = GEN_INT (lane);
2418	return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2419      }
2420  }
2421 [(set_attr "type" "neon_fp_mla_s<q>")]
2422)
2423
2424(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2425  [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2426     (unspec:VCVTF
2427	[(match_operand:VCVTF 1 "s_register_operand")
2428	 (PLUSMINUS:<VFML>
2429	   (match_operand:<VFML> 2 "s_register_operand")
2430	   (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2431	 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2432  "TARGET_FP16FML"
2433{
2434  rtx lane
2435    = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2436  rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2437  emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2438		(operands[0], operands[1], operands[2], operands[3],
2439		 half, lane));
2440  DONE;
2441})
2442
2443;; Used to implement the intrinsics:
2444;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2445;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2446;; Needs a bit of care to get the modes of the different sub-expressions right
2447;; due to 'a' and 'b' having different sizes and make sure we use the right
2448;; S or D subregister to select the appropriate lane from.
2449
2450(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2451 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2452	(fma:VCVTF
2453	 (float_extend:VCVTF
2454	  (vec_select:<VFMLSEL>
2455	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2456	   (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2457	 (float_extend:VCVTF
2458	   (vec_duplicate:<VFMLSEL>
2459	     (vec_select:HF
2460	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2461	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2462	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2463 "TARGET_FP16FML"
2464 {
2465   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2466   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2467   int new_lane = lane % elts_per_reg;
2468   int regdiff = lane / elts_per_reg;
2469   operands[5] = GEN_INT (new_lane);
2470   /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2471      because we want the print_operand code to print the appropriate
2472      S or D register prefix.  */
2473   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2474   operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2475   return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2476 }
2477 [(set_attr "type" "neon_fp_mla_s<q>")]
2478)
2479
2480;; Used to implement the intrinsics:
2481;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2482;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2483;; Needs a bit of care to get the modes of the different sub-expressions right
2484;; due to 'a' and 'b' having different sizes and make sure we use the right
2485;; S or D subregister to select the appropriate lane from.
2486
2487(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2488 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2489	(fma:VCVTF
2490	 (float_extend:VCVTF
2491	  (vec_select:<VFMLSEL>
2492	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2493	   (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2494	 (float_extend:VCVTF
2495	   (vec_duplicate:<VFMLSEL>
2496	     (vec_select:HF
2497	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2498	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2499	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2500 "TARGET_FP16FML"
2501 {
2502   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2503   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2504   int new_lane = lane % elts_per_reg;
2505   int regdiff = lane / elts_per_reg;
2506   operands[5] = GEN_INT (new_lane);
2507   /* We re-create operands[3] in the halved VFMLSEL mode
2508      because we've calculated the correct half-width subreg to extract
2509      the lane from and we want to print *that* subreg instead.  */
2510   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2511   return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2512 }
2513 [(set_attr "type" "neon_fp_mla_s<q>")]
2514)
2515
2516(define_insn "vfmal_lane_high<mode>_intrinsic"
2517 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2518	(fma:VCVTF
2519	 (float_extend:VCVTF
2520	  (vec_select:<VFMLSEL>
2521	   (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2522	   (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2523	 (float_extend:VCVTF
2524	   (vec_duplicate:<VFMLSEL>
2525	     (vec_select:HF
2526	       (match_operand:<VFML> 3 "s_register_operand" "x")
2527	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2528	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2529 "TARGET_FP16FML"
2530  {
2531    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2532    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2533      {
2534	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2535	return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2536      }
2537    else
2538      {
2539	operands[5] = GEN_INT (lane);
2540	return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2541      }
2542  }
2543 [(set_attr "type" "neon_fp_mla_s<q>")]
2544)
2545
2546(define_insn "vfmsl_lane_low<mode>_intrinsic"
2547 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2548	(fma:VCVTF
2549	 (float_extend:VCVTF
2550	  (neg:<VFMLSEL>
2551	    (vec_select:<VFMLSEL>
2552	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2553	      (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2554	 (float_extend:VCVTF
2555	   (vec_duplicate:<VFMLSEL>
2556	     (vec_select:HF
2557	       (match_operand:<VFML> 3 "s_register_operand" "x")
2558	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2559	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2560 "TARGET_FP16FML"
2561 {
2562    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2563    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2564      {
2565	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2566	return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2567      }
2568    else
2569      {
2570	operands[5] = GEN_INT (lane);
2571	return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2572      }
2573  }
2574 [(set_attr "type" "neon_fp_mla_s<q>")]
2575)
2576
2577;; Used to implement the intrinsics:
2578;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2579;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2580;; Needs a bit of care to get the modes of the different sub-expressions right
2581;; due to 'a' and 'b' having different sizes and make sure we use the right
2582;; S or D subregister to select the appropriate lane from.
2583
2584(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2585 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2586	(fma:VCVTF
2587	 (float_extend:VCVTF
2588	  (neg:<VFMLSEL>
2589	    (vec_select:<VFMLSEL>
2590	      (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2591	      (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2592	 (float_extend:VCVTF
2593	   (vec_duplicate:<VFMLSEL>
2594	     (vec_select:HF
2595	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2596	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2597	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2598 "TARGET_FP16FML"
2599 {
2600   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2601   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2602   int new_lane = lane % elts_per_reg;
2603   int regdiff = lane / elts_per_reg;
2604   operands[5] = GEN_INT (new_lane);
2605   /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2606      because we want the print_operand code to print the appropriate
2607      S or D register prefix.  */
2608   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2609   operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2610   return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2611 }
2612 [(set_attr "type" "neon_fp_mla_s<q>")]
2613)
2614
2615;; Used to implement the intrinsics:
2616;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2617;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2618;; Needs a bit of care to get the modes of the different sub-expressions right
2619;; due to 'a' and 'b' having different sizes and make sure we use the right
2620;; S or D subregister to select the appropriate lane from.
2621
2622(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2623 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2624	(fma:VCVTF
2625	 (float_extend:VCVTF
2626	  (neg:<VFMLSEL>
2627	    (vec_select:<VFMLSEL>
2628	     (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2629	     (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2630	 (float_extend:VCVTF
2631	   (vec_duplicate:<VFMLSEL>
2632	     (vec_select:HF
2633	       (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2634	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2635	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2636 "TARGET_FP16FML"
2637 {
2638   int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2639   int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2640   int new_lane = lane % elts_per_reg;
2641   int regdiff = lane / elts_per_reg;
2642   operands[5] = GEN_INT (new_lane);
2643   /* We re-create operands[3] in the halved VFMLSEL mode
2644      because we've calculated the correct half-width subreg to extract
2645      the lane from and we want to print *that* subreg instead.  */
2646   operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2647   return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2648 }
2649 [(set_attr "type" "neon_fp_mla_s<q>")]
2650)
2651
2652(define_insn "vfmsl_lane_high<mode>_intrinsic"
2653 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2654	(fma:VCVTF
2655	 (float_extend:VCVTF
2656	  (neg:<VFMLSEL>
2657	    (vec_select:<VFMLSEL>
2658	     (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2659	     (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2660	 (float_extend:VCVTF
2661	   (vec_duplicate:<VFMLSEL>
2662	     (vec_select:HF
2663	       (match_operand:<VFML> 3 "s_register_operand" "x")
2664	       (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2665	 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2666 "TARGET_FP16FML"
2667  {
2668    int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2669    if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2670      {
2671	operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2672	return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2673      }
2674    else
2675      {
2676	operands[5] = GEN_INT (lane);
2677	return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2678      }
2679  }
2680 [(set_attr "type" "neon_fp_mla_s<q>")]
2681)
2682
2683; Used for intrinsics when flag_unsafe_math_optimizations is false.
2684
2685(define_insn "neon_vmla<mode>_unspec"
2686  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2687	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2688		      (match_operand:VDQW 2 "s_register_operand" "w")
2689		      (match_operand:VDQW 3 "s_register_operand" "w")]
2690		    UNSPEC_VMLA))]
2691  "TARGET_NEON"
2692  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2693  [(set (attr "type")
2694      (if_then_else (match_test "<Is_float_mode>")
2695                    (const_string "neon_fp_mla_s<q>")
2696                    (const_string "neon_mla_<V_elem_ch><q>")))]
2697)
2698
2699(define_insn "neon_vmlal<sup><mode>"
2700  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2701        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2702		           (match_operand:VW 2 "s_register_operand" "w")
2703		           (match_operand:VW 3 "s_register_operand" "w")]
2704                          VMLAL))]
2705  "TARGET_NEON"
2706  "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2707  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2708)
2709
2710(define_expand "neon_vmls<mode>"
2711  [(match_operand:VDQW 0 "s_register_operand" "=w")
2712   (match_operand:VDQW 1 "s_register_operand" "0")
2713   (match_operand:VDQW 2 "s_register_operand" "w")
2714   (match_operand:VDQW 3 "s_register_operand" "w")]
2715  "TARGET_NEON"
2716{
2717  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2718    emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2719		 operands[1], operands[2], operands[3]));
2720  else
2721    emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2722					   operands[2], operands[3]));
2723  DONE;
2724})
2725
2726; Used for intrinsics when flag_unsafe_math_optimizations is false.
2727
2728(define_insn "neon_vmls<mode>_unspec"
2729  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2730	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2731		      (match_operand:VDQW 2 "s_register_operand" "w")
2732		      (match_operand:VDQW 3 "s_register_operand" "w")]
2733		    UNSPEC_VMLS))]
2734  "TARGET_NEON"
2735  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2736  [(set (attr "type")
2737      (if_then_else (match_test "<Is_float_mode>")
2738                    (const_string "neon_fp_mla_s<q>")
2739                    (const_string "neon_mla_<V_elem_ch><q>")))]
2740)
2741
2742(define_insn "neon_vmlsl<sup><mode>"
2743  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2744        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2745		           (match_operand:VW 2 "s_register_operand" "w")
2746		           (match_operand:VW 3 "s_register_operand" "w")]
2747                          VMLSL))]
2748  "TARGET_NEON"
2749  "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2750  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2751)
2752
2753;; vqdmulh, vqrdmulh
2754(define_insn "neon_vq<r>dmulh<mode>"
2755  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2756        (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2757		       (match_operand:VMDQI 2 "s_register_operand" "w")]
2758                      VQDMULH))]
2759  "TARGET_NEON"
2760  "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2761  [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2762)
2763
2764;; vqrdmlah, vqrdmlsh
2765(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2766  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2767	(unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2768		       (match_operand:VMDQI 2 "s_register_operand" "w")
2769		       (match_operand:VMDQI 3 "s_register_operand" "w")]
2770		      VQRDMLH_AS))]
2771  "TARGET_NEON_RDMA"
2772  "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2773  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2774)
2775
2776(define_insn "neon_vqdmlal<mode>"
2777  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2778        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2779		           (match_operand:VMDI 2 "s_register_operand" "w")
2780		           (match_operand:VMDI 3 "s_register_operand" "w")]
2781                          UNSPEC_VQDMLAL))]
2782  "TARGET_NEON"
2783  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2784  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2785)
2786
2787(define_insn "neon_vqdmlsl<mode>"
2788  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2789        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2790		           (match_operand:VMDI 2 "s_register_operand" "w")
2791		           (match_operand:VMDI 3 "s_register_operand" "w")]
2792                          UNSPEC_VQDMLSL))]
2793  "TARGET_NEON"
2794  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2795  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2796)
2797
2798(define_insn "neon_vmull<sup><mode>"
2799  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2800        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2801		           (match_operand:VW 2 "s_register_operand" "w")]
2802                          VMULL))]
2803  "TARGET_NEON"
2804  "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2805  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2806)
2807
2808(define_insn "neon_vqdmull<mode>"
2809  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2810        (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2811		           (match_operand:VMDI 2 "s_register_operand" "w")]
2812                          UNSPEC_VQDMULL))]
2813  "TARGET_NEON"
2814  "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2815  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2816)
2817
2818(define_expand "neon_vsub<mode>"
2819  [(match_operand:VCVTF 0 "s_register_operand" "=w")
2820   (match_operand:VCVTF 1 "s_register_operand" "w")
2821   (match_operand:VCVTF 2 "s_register_operand" "w")]
2822  "TARGET_NEON"
2823{
2824  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2825    emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2826  else
2827    emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2828					   operands[2]));
2829  DONE;
2830})
2831
2832; Used for intrinsics when flag_unsafe_math_optimizations is false.
2833
2834(define_insn "neon_vsub<mode>_unspec"
2835  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2836        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2837		      (match_operand:VCVTF 2 "s_register_operand" "w")]
2838                     UNSPEC_VSUB))]
2839  "TARGET_NEON"
2840  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2841  [(set (attr "type")
2842      (if_then_else (match_test "<Is_float_mode>")
2843                    (const_string "neon_fp_addsub_s<q>")
2844                    (const_string "neon_sub<q>")))]
2845)
2846
2847(define_insn "neon_vsubl<sup><mode>"
2848  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2849        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2850		           (match_operand:VDI 2 "s_register_operand" "w")]
2851                          VSUBL))]
2852  "TARGET_NEON"
2853  "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2854  [(set_attr "type" "neon_sub_long")]
2855)
2856
2857(define_insn "neon_vsubw<sup><mode>"
2858  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2859        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2860		           (match_operand:VDI 2 "s_register_operand" "w")]
2861			  VSUBW))]
2862  "TARGET_NEON"
2863  "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2864  [(set_attr "type" "neon_sub_widen")]
2865)
2866
2867(define_insn "neon_vqsub<sup><mode>"
2868  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2869        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2870		       (match_operand:VDQIX 2 "s_register_operand" "w")]
2871		      VQSUB))]
2872  "TARGET_NEON"
2873  "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2874  [(set_attr "type" "neon_qsub<q>")]
2875)
2876
2877(define_insn "neon_vhsub<sup><mode>"
2878  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2879        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2880		       (match_operand:VDQIW 2 "s_register_operand" "w")]
2881		      VHSUB))]
2882  "TARGET_NEON"
2883  "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2884  [(set_attr "type" "neon_sub_halve<q>")]
2885)
2886
2887(define_insn "neon_v<r>subhn<mode>"
2888  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2889        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2890		            (match_operand:VN 2 "s_register_operand" "w")]
2891                           VSUBHN))]
2892  "TARGET_NEON"
2893  "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2894  [(set_attr "type" "neon_sub_halve_narrow_q")]
2895)
2896
2897;; These may expand to an UNSPEC pattern when a floating point mode is used
2898;; without unsafe math optimizations.
2899(define_expand "neon_vc<cmp_op><mode>"
2900  [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2901     (neg:<V_cmp_result>
2902       (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2903                         (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2904  "TARGET_NEON"
2905  {
2906    /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2907       are enabled.  */
2908    if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2909        && !flag_unsafe_math_optimizations)
2910      {
2911        /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2912           we define gen_neon_vceq<mode>_insn_unspec only for float modes
2913           whereas this expander iterates over the integer modes as well,
2914           but we will never expand to UNSPECs for the integer comparisons.  */
2915        switch (<MODE>mode)
2916          {
2917            case E_V2SFmode:
2918              emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2919                                                              operands[1],
2920                                                              operands[2]));
2921              break;
2922            case E_V4SFmode:
2923              emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2924                                                              operands[1],
2925                                                              operands[2]));
2926              break;
2927            default:
2928              gcc_unreachable ();
2929          }
2930      }
2931    else
2932      emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2933                                                 operands[1],
2934                                                 operands[2]));
2935    DONE;
2936  }
2937)
2938
2939(define_insn "neon_vc<cmp_op><mode>_insn"
2940  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2941        (neg:<V_cmp_result>
2942          (COMPARISONS:<V_cmp_result>
2943            (match_operand:VDQW 1 "s_register_operand" "w,w")
2944            (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2945  "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2946                    && !flag_unsafe_math_optimizations)"
2947  {
2948    char pattern[100];
2949    sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2950                      " %%<V_reg>1, %s",
2951                       GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2952                         ? "f" : "<cmp_type>",
2953                       which_alternative == 0
2954                         ? "%<V_reg>2" : "#0");
2955    output_asm_insn (pattern, operands);
2956    return "";
2957  }
2958  [(set (attr "type")
2959        (if_then_else (match_operand 2 "zero_operand")
2960                      (const_string "neon_compare_zero<q>")
2961                      (const_string "neon_compare<q>")))]
2962)
2963
2964(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2965  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2966        (unspec:<V_cmp_result>
2967	  [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2968	   (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2969          NEON_VCMP))]
2970  "TARGET_NEON"
2971  {
2972    char pattern[100];
2973    sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2974                       " %%<V_reg>1, %s",
2975                       which_alternative == 0
2976                         ? "%<V_reg>2" : "#0");
2977    output_asm_insn (pattern, operands);
2978    return "";
2979}
2980  [(set_attr "type" "neon_fp_compare_s<q>")]
2981)
2982
2983(define_expand "neon_vc<cmp_op><mode>"
2984 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2985  (neg:<V_cmp_result>
2986   (COMPARISONS:VH
2987    (match_operand:VH 1 "s_register_operand")
2988    (match_operand:VH 2 "reg_or_zero_operand")))]
2989 "TARGET_NEON_FP16INST"
2990{
2991  /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2992     are enabled.  */
2993  if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2994      && !flag_unsafe_math_optimizations)
2995    emit_insn
2996      (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2997       (operands[0], operands[1], operands[2]));
2998  else
2999    emit_insn
3000      (gen_neon_vc<cmp_op><mode>_fp16insn
3001       (operands[0], operands[1], operands[2]));
3002  DONE;
3003})
3004
3005(define_insn "neon_vc<cmp_op><mode>_fp16insn"
3006 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3007   (neg:<V_cmp_result>
3008    (COMPARISONS:<V_cmp_result>
3009     (match_operand:VH 1 "s_register_operand" "w,w")
3010     (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3011 "TARGET_NEON_FP16INST
3012  && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3013  && !flag_unsafe_math_optimizations)"
3014{
3015  char pattern[100];
3016  sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3017	   " %%<V_reg>1, %s",
3018	   GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3019	   ? "f" : "<cmp_type>",
3020	   which_alternative == 0
3021	   ? "%<V_reg>2" : "#0");
3022  output_asm_insn (pattern, operands);
3023  return "";
3024}
3025 [(set (attr "type")
3026   (if_then_else (match_operand 2 "zero_operand")
3027    (const_string "neon_compare_zero<q>")
3028    (const_string "neon_compare<q>")))])
3029
3030(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3031 [(set
3032   (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3033   (unspec:<V_cmp_result>
3034    [(match_operand:VH 1 "s_register_operand" "w,w")
3035     (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3036    NEON_VCMP))]
3037 "TARGET_NEON_FP16INST"
3038{
3039  char pattern[100];
3040  sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3041	   " %%<V_reg>1, %s",
3042	   which_alternative == 0
3043	   ? "%<V_reg>2" : "#0");
3044  output_asm_insn (pattern, operands);
3045  return "";
3046}
3047 [(set_attr "type" "neon_fp_compare_s<q>")])
3048
3049(define_insn "neon_vc<cmp_op>u<mode>"
3050  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3051        (neg:<V_cmp_result>
3052          (GTUGEU:<V_cmp_result>
3053	    (match_operand:VDQIW 1 "s_register_operand" "w")
3054	    (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3055  "TARGET_NEON"
3056  "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3057  [(set_attr "type" "neon_compare<q>")]
3058)
3059
3060(define_expand "neon_vca<cmp_op><mode>"
3061  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3062        (neg:<V_cmp_result>
3063          (GTGE:<V_cmp_result>
3064            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3065            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3066  "TARGET_NEON"
3067  {
3068    if (flag_unsafe_math_optimizations)
3069      emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3070                                                  operands[2]));
3071    else
3072      emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3073                                                         operands[1],
3074                                                         operands[2]));
3075    DONE;
3076  }
3077)
3078
3079(define_insn "neon_vca<cmp_op><mode>_insn"
3080  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3081        (neg:<V_cmp_result>
3082          (GTGE:<V_cmp_result>
3083            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3084            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3085  "TARGET_NEON && flag_unsafe_math_optimizations"
3086  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3087  [(set_attr "type" "neon_fp_compare_s<q>")]
3088)
3089
3090(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3091  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3092        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3093		                (match_operand:VCVTF 2 "s_register_operand" "w")]
3094                               NEON_VACMP))]
3095  "TARGET_NEON"
3096  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3097  [(set_attr "type" "neon_fp_compare_s<q>")]
3098)
3099
3100(define_expand "neon_vca<cmp_op><mode>"
3101  [(set
3102    (match_operand:<V_cmp_result> 0 "s_register_operand")
3103    (neg:<V_cmp_result>
3104     (GLTE:<V_cmp_result>
3105      (abs:VH (match_operand:VH 1 "s_register_operand"))
3106      (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3107 "TARGET_NEON_FP16INST"
3108{
3109  if (flag_unsafe_math_optimizations)
3110    emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3111	       (operands[0], operands[1], operands[2]));
3112  else
3113    emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3114	       (operands[0], operands[1], operands[2]));
3115  DONE;
3116})
3117
3118(define_insn "neon_vca<cmp_op><mode>_fp16insn"
3119  [(set
3120    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3121    (neg:<V_cmp_result>
3122     (GLTE:<V_cmp_result>
3123      (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3124      (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3125 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3126 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3127 [(set_attr "type" "neon_fp_compare_s<q>")]
3128)
3129
3130(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3131 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3132   (unspec:<V_cmp_result>
3133    [(match_operand:VH 1 "s_register_operand" "w")
3134     (match_operand:VH 2 "s_register_operand" "w")]
3135    NEON_VAGLTE))]
3136 "TARGET_NEON"
3137 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3138 [(set_attr "type" "neon_fp_compare_s<q>")]
3139)
3140
3141(define_expand "neon_vc<cmp_op>z<mode>"
3142 [(set
3143   (match_operand:<V_cmp_result> 0 "s_register_operand")
3144   (COMPARISONS:<V_cmp_result>
3145    (match_operand:VH 1 "s_register_operand")
3146    (const_int 0)))]
3147 "TARGET_NEON_FP16INST"
3148 {
3149  emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3150					CONST0_RTX (<MODE>mode)));
3151  DONE;
3152})
3153
3154(define_insn "neon_vtst<mode>"
3155  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3156        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3157		       (match_operand:VDQIW 2 "s_register_operand" "w")]
3158		      UNSPEC_VTST))]
3159  "TARGET_NEON"
3160  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3161  [(set_attr "type" "neon_tst<q>")]
3162)
3163
3164(define_insn "neon_vabd<sup><mode>"
3165  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3166        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3167		      (match_operand:VDQIW 2 "s_register_operand" "w")]
3168		     VABD))]
3169  "TARGET_NEON"
3170  "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3171  [(set_attr "type" "neon_abd<q>")]
3172)
3173
3174(define_insn "neon_vabd<mode>"
3175  [(set (match_operand:VH 0 "s_register_operand" "=w")
3176    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3177		(match_operand:VH 2 "s_register_operand" "w")]
3178     UNSPEC_VABD_F))]
3179 "TARGET_NEON_FP16INST"
3180 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3181  [(set_attr "type" "neon_abd<q>")]
3182)
3183
3184(define_insn "neon_vabdf<mode>"
3185  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3186        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3187		      (match_operand:VCVTF 2 "s_register_operand" "w")]
3188		     UNSPEC_VABD_F))]
3189  "TARGET_NEON"
3190  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3191  [(set_attr "type" "neon_fp_abd_s<q>")]
3192)
3193
3194(define_insn "neon_vabdl<sup><mode>"
3195  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3196        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3197		           (match_operand:VW 2 "s_register_operand" "w")]
3198                          VABDL))]
3199  "TARGET_NEON"
3200  "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3201  [(set_attr "type" "neon_abd_long")]
3202)
3203
3204(define_insn "neon_vaba<sup><mode>"
3205  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3206        (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3207		                   (match_operand:VDQIW 3 "s_register_operand" "w")]
3208		                  VABD)
3209		    (match_operand:VDQIW 1 "s_register_operand" "0")))]
3210  "TARGET_NEON"
3211  "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3212  [(set_attr "type" "neon_arith_acc<q>")]
3213)
3214
3215(define_insn "neon_vabal<sup><mode>"
3216  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3217        (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3218                                           (match_operand:VW 3 "s_register_operand" "w")]
3219					   VABDL)
3220			 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3221  "TARGET_NEON"
3222  "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3223  [(set_attr "type" "neon_arith_acc<q>")]
3224)
3225
3226(define_insn "neon_v<maxmin><sup><mode>"
3227  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3228        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3229		      (match_operand:VDQIW 2 "s_register_operand" "w")]
3230                     VMAXMIN))]
3231  "TARGET_NEON"
3232  "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3233  [(set_attr "type" "neon_minmax<q>")]
3234)
3235
3236(define_insn "neon_v<maxmin>f<mode>"
3237  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3238        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3239		      (match_operand:VCVTF 2 "s_register_operand" "w")]
3240                     VMAXMINF))]
3241  "TARGET_NEON"
3242  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3243  [(set_attr "type" "neon_fp_minmax_s<q>")]
3244)
3245
3246(define_insn "neon_v<maxmin>f<mode>"
3247 [(set (match_operand:VH 0 "s_register_operand" "=w")
3248   (unspec:VH
3249    [(match_operand:VH 1 "s_register_operand" "w")
3250     (match_operand:VH 2 "s_register_operand" "w")]
3251    VMAXMINF))]
3252 "TARGET_NEON_FP16INST"
3253 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3254 [(set_attr "type" "neon_fp_minmax_s<q>")]
3255)
3256
3257(define_insn "neon_vp<maxmin>fv4hf"
3258 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3259   (unspec:V4HF
3260    [(match_operand:V4HF 1 "s_register_operand" "w")
3261     (match_operand:V4HF 2 "s_register_operand" "w")]
3262    VPMAXMINF))]
3263 "TARGET_NEON_FP16INST"
3264 "vp<maxmin>.f16\t%P0, %P1, %P2"
3265  [(set_attr "type" "neon_reduc_minmax")]
3266)
3267
3268(define_insn "neon_<fmaxmin_op><mode>"
3269 [(set
3270   (match_operand:VH 0 "s_register_operand" "=w")
3271   (unspec:VH
3272    [(match_operand:VH 1 "s_register_operand" "w")
3273     (match_operand:VH 2 "s_register_operand" "w")]
3274    VMAXMINFNM))]
3275 "TARGET_NEON_FP16INST"
3276 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3277 [(set_attr "type" "neon_fp_minmax_s<q>")]
3278)
3279
3280;; v<maxmin>nm intrinsics.
3281(define_insn "neon_<fmaxmin_op><mode>"
3282  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3283	(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3284		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3285		       VMAXMINFNM))]
3286  "TARGET_NEON && TARGET_VFP5"
3287  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3288  [(set_attr "type" "neon_fp_minmax_s<q>")]
3289)
3290
3291;; Vector forms for the IEEE-754 fmax()/fmin() functions
3292(define_insn "<fmaxmin><mode>3"
3293  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3294	(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3295		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3296		       VMAXMINFNM))]
3297  "TARGET_NEON && TARGET_VFP5"
3298  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3299  [(set_attr "type" "neon_fp_minmax_s<q>")]
3300)
3301
3302(define_expand "neon_vpadd<mode>"
3303  [(match_operand:VD 0 "s_register_operand" "=w")
3304   (match_operand:VD 1 "s_register_operand" "w")
3305   (match_operand:VD 2 "s_register_operand" "w")]
3306  "TARGET_NEON"
3307{
3308  emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3309					    operands[2]));
3310  DONE;
3311})
3312
3313(define_insn "neon_vpaddl<sup><mode>"
3314  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3315        (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3316                                 VPADDL))]
3317  "TARGET_NEON"
3318  "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3319  [(set_attr "type" "neon_reduc_add_long")]
3320)
3321
3322(define_insn "neon_vpadal<sup><mode>"
3323  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3324        (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3325                                  (match_operand:VDQIW 2 "s_register_operand" "w")]
3326                                 VPADAL))]
3327  "TARGET_NEON"
3328  "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3329  [(set_attr "type" "neon_reduc_add_acc")]
3330)
3331
3332(define_insn "neon_vp<maxmin><sup><mode>"
3333  [(set (match_operand:VDI 0 "s_register_operand" "=w")
3334        (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3335		    (match_operand:VDI 2 "s_register_operand" "w")]
3336                   VPMAXMIN))]
3337  "TARGET_NEON"
3338  "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3339  [(set_attr "type" "neon_reduc_minmax<q>")]
3340)
3341
3342(define_insn "neon_vp<maxmin>f<mode>"
3343  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3344        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3345		    (match_operand:VCVTF 2 "s_register_operand" "w")]
3346                   VPMAXMINF))]
3347  "TARGET_NEON"
3348  "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3349  [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3350)
3351
3352(define_insn "neon_vrecps<mode>"
3353  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3354        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3355		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3356                      UNSPEC_VRECPS))]
3357  "TARGET_NEON"
3358  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3359  [(set_attr "type" "neon_fp_recps_s<q>")]
3360)
3361
3362(define_insn "neon_vrecps<mode>"
3363  [(set
3364    (match_operand:VH 0 "s_register_operand" "=w")
3365    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3366		(match_operand:VH 2 "s_register_operand" "w")]
3367     UNSPEC_VRECPS))]
3368  "TARGET_NEON_FP16INST"
3369  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3370  [(set_attr "type" "neon_fp_recps_s<q>")]
3371)
3372
3373(define_insn "neon_vrsqrts<mode>"
3374  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3375        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3376		       (match_operand:VCVTF 2 "s_register_operand" "w")]
3377                      UNSPEC_VRSQRTS))]
3378  "TARGET_NEON"
3379  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3380  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3381)
3382
3383(define_insn "neon_vrsqrts<mode>"
3384  [(set
3385    (match_operand:VH 0 "s_register_operand" "=w")
3386    (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3387		 (match_operand:VH 2 "s_register_operand" "w")]
3388     UNSPEC_VRSQRTS))]
3389 "TARGET_NEON_FP16INST"
3390 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3391 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3392)
3393
3394(define_expand "neon_vabs<mode>"
3395  [(match_operand:VDQW 0 "s_register_operand" "")
3396   (match_operand:VDQW 1 "s_register_operand" "")]
3397  "TARGET_NEON"
3398{
3399  emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3400  DONE;
3401})
3402
3403(define_insn "neon_vqabs<mode>"
3404  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3405	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3406		      UNSPEC_VQABS))]
3407  "TARGET_NEON"
3408  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3409  [(set_attr "type" "neon_qabs<q>")]
3410)
3411
3412(define_insn "neon_bswap<mode>"
3413  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3414        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3415  "TARGET_NEON"
3416  "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3417  [(set_attr "type" "neon_rev<q>")]
3418)
3419
3420(define_expand "neon_vneg<mode>"
3421  [(match_operand:VDQW 0 "s_register_operand" "")
3422   (match_operand:VDQW 1 "s_register_operand" "")]
3423  "TARGET_NEON"
3424{
3425  emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3426  DONE;
3427})
3428
3429;; These instructions map to the __builtins for the Dot Product operations.
3430(define_insn "neon_<sup>dot<vsi2qi>"
3431  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3432	(plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3433		    (unspec:VCVTI [(match_operand:<VSI2QI> 2
3434							"register_operand" "w")
3435				   (match_operand:<VSI2QI> 3
3436							"register_operand" "w")]
3437		DOTPROD)))]
3438  "TARGET_DOTPROD"
3439  "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3440  [(set_attr "type" "neon_dot")]
3441)
3442
3443;; These instructions map to the __builtins for the Dot Product
3444;; indexed operations.
3445(define_insn "neon_<sup>dot_lane<vsi2qi>"
3446  [(set (match_operand:VCVTI 0 "register_operand" "=w")
3447	(plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3448		    (unspec:VCVTI [(match_operand:<VSI2QI> 2
3449							"register_operand" "w")
3450				   (match_operand:V8QI 3 "register_operand" "t")
3451				   (match_operand:SI 4 "immediate_operand" "i")]
3452		DOTPROD)))]
3453  "TARGET_DOTPROD"
3454  {
3455    operands[4]
3456      = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3457    return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3458  }
3459  [(set_attr "type" "neon_dot")]
3460)
3461
3462;; These expands map to the Dot Product optab the vectorizer checks for.
3463;; The auto-vectorizer expects a dot product builtin that also does an
3464;; accumulation into the provided register.
3465;; Given the following pattern
3466;;
3467;; for (i=0; i<len; i++) {
3468;;     c = a[i] * b[i];
3469;;     r += c;
3470;; }
3471;; return result;
3472;;
3473;; This can be auto-vectorized to
3474;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3475;;
3476;; given enough iterations.  However the vectorizer can keep unrolling the loop
3477;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3478;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3479;; ...
3480;;
3481;; and so the vectorizer provides r, in which the result has to be accumulated.
3482(define_expand "<sup>dot_prod<vsi2qi>"
3483  [(set (match_operand:VCVTI 0 "register_operand")
3484	(plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3485							"register_operand")
3486				   (match_operand:<VSI2QI> 2
3487							"register_operand")]
3488		     DOTPROD)
3489		    (match_operand:VCVTI 3 "register_operand")))]
3490  "TARGET_DOTPROD"
3491{
3492  emit_insn (
3493    gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3494				 operands[2]));
3495  emit_insn (gen_rtx_SET (operands[0], operands[3]));
3496  DONE;
3497})
3498
3499(define_expand "neon_copysignf<mode>"
3500  [(match_operand:VCVTF 0 "register_operand")
3501   (match_operand:VCVTF 1 "register_operand")
3502   (match_operand:VCVTF 2 "register_operand")]
3503  "TARGET_NEON"
3504  "{
3505     rtx v_bitmask_cast;
3506     rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3507     rtx c = GEN_INT (0x80000000);
3508
3509     emit_move_insn (v_bitmask,
3510		     gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3511     emit_move_insn (operands[0], operands[2]);
3512     v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3513					   <VCVTF:V_cmp_result>mode, 0);
3514     emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3515				     operands[1]));
3516
3517     DONE;
3518  }"
3519)
3520
3521(define_insn "neon_vqneg<mode>"
3522  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3523	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3524		      UNSPEC_VQNEG))]
3525  "TARGET_NEON"
3526  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3527  [(set_attr "type" "neon_qneg<q>")]
3528)
3529
3530(define_insn "neon_vcls<mode>"
3531  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3532	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3533		      UNSPEC_VCLS))]
3534  "TARGET_NEON"
3535  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3536  [(set_attr "type" "neon_cls<q>")]
3537)
3538
3539(define_insn "clz<mode>2"
3540  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3541        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3542  "TARGET_NEON"
3543  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3544  [(set_attr "type" "neon_cnt<q>")]
3545)
3546
3547(define_expand "neon_vclz<mode>"
3548  [(match_operand:VDQIW 0 "s_register_operand" "")
3549   (match_operand:VDQIW 1 "s_register_operand" "")]
3550  "TARGET_NEON"
3551{
3552  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3553  DONE;
3554})
3555
3556(define_insn "popcount<mode>2"
3557  [(set (match_operand:VE 0 "s_register_operand" "=w")
3558        (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3559  "TARGET_NEON"
3560  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3561  [(set_attr "type" "neon_cnt<q>")]
3562)
3563
3564(define_expand "neon_vcnt<mode>"
3565  [(match_operand:VE 0 "s_register_operand" "=w")
3566   (match_operand:VE 1 "s_register_operand" "w")]
3567  "TARGET_NEON"
3568{
3569  emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3570  DONE;
3571})
3572
3573(define_insn "neon_vrecpe<mode>"
3574  [(set (match_operand:VH 0 "s_register_operand" "=w")
3575	(unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3576		   UNSPEC_VRECPE))]
3577  "TARGET_NEON_FP16INST"
3578  "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3579  [(set_attr "type" "neon_fp_recpe_s<q>")]
3580)
3581
3582(define_insn "neon_vrecpe<mode>"
3583  [(set (match_operand:V32 0 "s_register_operand" "=w")
3584	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3585                    UNSPEC_VRECPE))]
3586  "TARGET_NEON"
3587  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3588  [(set_attr "type" "neon_fp_recpe_s<q>")]
3589)
3590
3591(define_insn "neon_vrsqrte<mode>"
3592  [(set (match_operand:V32 0 "s_register_operand" "=w")
3593	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3594                    UNSPEC_VRSQRTE))]
3595  "TARGET_NEON"
3596  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3597  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3598)
3599
3600(define_expand "neon_vmvn<mode>"
3601  [(match_operand:VDQIW 0 "s_register_operand" "")
3602   (match_operand:VDQIW 1 "s_register_operand" "")]
3603  "TARGET_NEON"
3604{
3605  emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3606  DONE;
3607})
3608
3609(define_insn "neon_vget_lane<mode>_sext_internal"
3610  [(set (match_operand:SI 0 "s_register_operand" "=r")
3611	(sign_extend:SI
3612	  (vec_select:<V_elem>
3613	    (match_operand:VD 1 "s_register_operand" "w")
3614	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3615  "TARGET_NEON"
3616{
3617  if (BYTES_BIG_ENDIAN)
3618    {
3619      int elt = INTVAL (operands[2]);
3620      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3621      operands[2] = GEN_INT (elt);
3622    }
3623  return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3624}
3625  [(set_attr "type" "neon_to_gp")]
3626)
3627
3628(define_insn "neon_vget_lane<mode>_zext_internal"
3629  [(set (match_operand:SI 0 "s_register_operand" "=r")
3630	(zero_extend:SI
3631	  (vec_select:<V_elem>
3632	    (match_operand:VD 1 "s_register_operand" "w")
3633	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3634  "TARGET_NEON"
3635{
3636  if (BYTES_BIG_ENDIAN)
3637    {
3638      int elt = INTVAL (operands[2]);
3639      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3640      operands[2] = GEN_INT (elt);
3641    }
3642  return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3643}
3644  [(set_attr "type" "neon_to_gp")]
3645)
3646
3647(define_insn "neon_vget_lane<mode>_sext_internal"
3648  [(set (match_operand:SI 0 "s_register_operand" "=r")
3649	(sign_extend:SI
3650	  (vec_select:<V_elem>
3651	    (match_operand:VQ2 1 "s_register_operand" "w")
3652	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3653  "TARGET_NEON"
3654{
3655  rtx ops[3];
3656  int regno = REGNO (operands[1]);
3657  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3658  unsigned int elt = INTVAL (operands[2]);
3659  unsigned int elt_adj = elt % halfelts;
3660
3661  if (BYTES_BIG_ENDIAN)
3662    elt_adj = halfelts - 1 - elt_adj;
3663
3664  ops[0] = operands[0];
3665  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3666  ops[2] = GEN_INT (elt_adj);
3667  output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3668
3669  return "";
3670}
3671  [(set_attr "type" "neon_to_gp_q")]
3672)
3673
3674(define_insn "neon_vget_lane<mode>_zext_internal"
3675  [(set (match_operand:SI 0 "s_register_operand" "=r")
3676	(zero_extend:SI
3677	  (vec_select:<V_elem>
3678	    (match_operand:VQ2 1 "s_register_operand" "w")
3679	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3680  "TARGET_NEON"
3681{
3682  rtx ops[3];
3683  int regno = REGNO (operands[1]);
3684  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3685  unsigned int elt = INTVAL (operands[2]);
3686  unsigned int elt_adj = elt % halfelts;
3687
3688  if (BYTES_BIG_ENDIAN)
3689    elt_adj = halfelts - 1 - elt_adj;
3690
3691  ops[0] = operands[0];
3692  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3693  ops[2] = GEN_INT (elt_adj);
3694  output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3695
3696  return "";
3697}
3698  [(set_attr "type" "neon_to_gp_q")]
3699)
3700
3701(define_expand "neon_vget_lane<mode>"
3702  [(match_operand:<V_ext> 0 "s_register_operand" "")
3703   (match_operand:VDQW 1 "s_register_operand" "")
3704   (match_operand:SI 2 "immediate_operand" "")]
3705  "TARGET_NEON"
3706{
3707  if (BYTES_BIG_ENDIAN)
3708    {
3709      /* The intrinsics are defined in terms of a model where the
3710	 element ordering in memory is vldm order, whereas the generic
3711	 RTL is defined in terms of a model where the element ordering
3712	 in memory is array order.  Convert the lane number to conform
3713	 to this model.  */
3714      unsigned int elt = INTVAL (operands[2]);
3715      unsigned int reg_nelts
3716	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3717      elt ^= reg_nelts - 1;
3718      operands[2] = GEN_INT (elt);
3719    }
3720
3721  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3722    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3723						operands[2]));
3724  else
3725    emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3726						       operands[1],
3727						       operands[2]));
3728  DONE;
3729})
3730
3731(define_expand "neon_vget_laneu<mode>"
3732  [(match_operand:<V_ext> 0 "s_register_operand" "")
3733   (match_operand:VDQIW 1 "s_register_operand" "")
3734   (match_operand:SI 2 "immediate_operand" "")]
3735  "TARGET_NEON"
3736{
3737  if (BYTES_BIG_ENDIAN)
3738    {
3739      /* The intrinsics are defined in terms of a model where the
3740	 element ordering in memory is vldm order, whereas the generic
3741	 RTL is defined in terms of a model where the element ordering
3742	 in memory is array order.  Convert the lane number to conform
3743	 to this model.  */
3744      unsigned int elt = INTVAL (operands[2]);
3745      unsigned int reg_nelts
3746	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3747      elt ^= reg_nelts - 1;
3748      operands[2] = GEN_INT (elt);
3749    }
3750
3751  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3752    emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3753						operands[2]));
3754  else
3755    emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3756						       operands[1],
3757						       operands[2]));
3758  DONE;
3759})
3760
3761(define_expand "neon_vget_lanedi"
3762  [(match_operand:DI 0 "s_register_operand" "=r")
3763   (match_operand:DI 1 "s_register_operand" "w")
3764   (match_operand:SI 2 "immediate_operand" "")]
3765  "TARGET_NEON"
3766{
3767  emit_move_insn (operands[0], operands[1]);
3768  DONE;
3769})
3770
3771(define_expand "neon_vget_lanev2di"
3772  [(match_operand:DI 0 "s_register_operand" "")
3773   (match_operand:V2DI 1 "s_register_operand" "")
3774   (match_operand:SI 2 "immediate_operand" "")]
3775  "TARGET_NEON"
3776{
3777  int lane;
3778
3779if (BYTES_BIG_ENDIAN)
3780    {
3781      /* The intrinsics are defined in terms of a model where the
3782	 element ordering in memory is vldm order, whereas the generic
3783	 RTL is defined in terms of a model where the element ordering
3784	 in memory is array order.  Convert the lane number to conform
3785	 to this model.  */
3786      unsigned int elt = INTVAL (operands[2]);
3787      unsigned int reg_nelts = 2;
3788      elt ^= reg_nelts - 1;
3789      operands[2] = GEN_INT (elt);
3790    }
3791
3792  lane = INTVAL (operands[2]);
3793  gcc_assert ((lane ==0) || (lane == 1));
3794  emit_move_insn (operands[0], lane == 0
3795				? gen_lowpart (DImode, operands[1])
3796				: gen_highpart (DImode, operands[1]));
3797  DONE;
3798})
3799
3800(define_expand "neon_vset_lane<mode>"
3801  [(match_operand:VDQ 0 "s_register_operand" "=w")
3802   (match_operand:<V_elem> 1 "s_register_operand" "r")
3803   (match_operand:VDQ 2 "s_register_operand" "0")
3804   (match_operand:SI 3 "immediate_operand" "i")]
3805  "TARGET_NEON"
3806{
3807  unsigned int elt = INTVAL (operands[3]);
3808
3809  if (BYTES_BIG_ENDIAN)
3810    {
3811      unsigned int reg_nelts
3812	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3813      elt ^= reg_nelts - 1;
3814    }
3815
3816  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3817                                         GEN_INT (1 << elt), operands[2]));
3818  DONE;
3819})
3820
3821; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3822
3823(define_expand "neon_vset_lanedi"
3824  [(match_operand:DI 0 "s_register_operand" "=w")
3825   (match_operand:DI 1 "s_register_operand" "r")
3826   (match_operand:DI 2 "s_register_operand" "0")
3827   (match_operand:SI 3 "immediate_operand" "i")]
3828  "TARGET_NEON"
3829{
3830  emit_move_insn (operands[0], operands[1]);
3831  DONE;
3832})
3833
3834(define_expand "neon_vcreate<mode>"
3835  [(match_operand:VD_RE 0 "s_register_operand" "")
3836   (match_operand:DI 1 "general_operand" "")]
3837  "TARGET_NEON"
3838{
3839  rtx src = gen_lowpart (<MODE>mode, operands[1]);
3840  emit_move_insn (operands[0], src);
3841  DONE;
3842})
3843
3844(define_insn "neon_vdup_n<mode>"
3845  [(set (match_operand:VX 0 "s_register_operand" "=w")
3846        (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3847  "TARGET_NEON"
3848  "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3849  [(set_attr "type" "neon_from_gp<q>")]
3850)
3851
3852(define_insn "neon_vdup_nv4hf"
3853  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3854        (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3855  "TARGET_NEON"
3856  "vdup.16\t%P0, %1"
3857  [(set_attr "type" "neon_from_gp")]
3858)
3859
3860(define_insn "neon_vdup_nv8hf"
3861  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3862        (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3863  "TARGET_NEON"
3864  "vdup.16\t%q0, %1"
3865  [(set_attr "type" "neon_from_gp_q")]
3866)
3867
3868(define_insn "neon_vdup_n<mode>"
3869  [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3870        (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3871  "TARGET_NEON"
3872  "@
3873  vdup.<V_sz_elem>\t%<V_reg>0, %1
3874  vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3875  [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3876)
3877
3878(define_expand "neon_vdup_ndi"
3879  [(match_operand:DI 0 "s_register_operand" "=w")
3880   (match_operand:DI 1 "s_register_operand" "r")]
3881  "TARGET_NEON"
3882{
3883  emit_move_insn (operands[0], operands[1]);
3884  DONE;
3885}
3886)
3887
3888(define_insn "neon_vdup_nv2di"
3889  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3890        (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3891  "TARGET_NEON"
3892  "@
3893  vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3894  vmov\t%e0, %P1\;vmov\t%f0, %P1"
3895  [(set_attr "length" "8")
3896   (set_attr "type" "multiple")]
3897)
3898
3899(define_insn "neon_vdup_lane<mode>_internal"
3900  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3901  	(vec_duplicate:VDQW
3902          (vec_select:<V_elem>
3903            (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3904            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3905  "TARGET_NEON"
3906{
3907  if (BYTES_BIG_ENDIAN)
3908    {
3909      int elt = INTVAL (operands[2]);
3910      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3911      operands[2] = GEN_INT (elt);
3912    }
3913  if (<Is_d_reg>)
3914    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3915  else
3916    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3917}
3918  [(set_attr "type" "neon_dup<q>")]
3919)
3920
3921(define_insn "neon_vdup_lane<mode>_internal"
3922 [(set (match_operand:VH 0 "s_register_operand" "=w")
3923   (vec_duplicate:VH
3924    (vec_select:<V_elem>
3925     (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3926     (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3927 "TARGET_NEON && TARGET_FP16"
3928{
3929  if (BYTES_BIG_ENDIAN)
3930    {
3931      int elt = INTVAL (operands[2]);
3932      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3933      operands[2] = GEN_INT (elt);
3934    }
3935  if (<Is_d_reg>)
3936    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3937  else
3938    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3939}
3940  [(set_attr "type" "neon_dup<q>")]
3941)
3942
3943(define_expand "neon_vdup_lane<mode>"
3944  [(match_operand:VDQW 0 "s_register_operand" "=w")
3945   (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3946   (match_operand:SI 2 "immediate_operand" "i")]
3947  "TARGET_NEON"
3948{
3949  if (BYTES_BIG_ENDIAN)
3950    {
3951      unsigned int elt = INTVAL (operands[2]);
3952      unsigned int reg_nelts
3953	= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3954      elt ^= reg_nelts - 1;
3955      operands[2] = GEN_INT (elt);
3956    }
3957    emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3958                                                  operands[2]));
3959    DONE;
3960})
3961
3962(define_expand "neon_vdup_lane<mode>"
3963  [(match_operand:VH 0 "s_register_operand")
3964   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3965   (match_operand:SI 2 "immediate_operand")]
3966  "TARGET_NEON && TARGET_FP16"
3967{
3968  if (BYTES_BIG_ENDIAN)
3969    {
3970      unsigned int elt = INTVAL (operands[2]);
3971      unsigned int reg_nelts
3972	= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3973      elt ^= reg_nelts - 1;
3974      operands[2] = GEN_INT (elt);
3975    }
3976  emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3977						operands[2]));
3978  DONE;
3979})
3980
3981; Scalar index is ignored, since only zero is valid here.
3982(define_expand "neon_vdup_lanedi"
3983  [(match_operand:DI 0 "s_register_operand" "=w")
3984   (match_operand:DI 1 "s_register_operand" "w")
3985   (match_operand:SI 2 "immediate_operand" "i")]
3986  "TARGET_NEON"
3987{
3988  emit_move_insn (operands[0], operands[1]);
3989  DONE;
3990})
3991
3992; Likewise for v2di, as the DImode second operand has only a single element.
3993(define_expand "neon_vdup_lanev2di"
3994  [(match_operand:V2DI 0 "s_register_operand" "=w")
3995   (match_operand:DI 1 "s_register_operand" "w")
3996   (match_operand:SI 2 "immediate_operand" "i")]
3997  "TARGET_NEON"
3998{
3999  emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4000  DONE;
4001})
4002
4003; Disabled before reload because we don't want combine doing something silly,
4004; but used by the post-reload expansion of neon_vcombine.
4005(define_insn "*neon_vswp<mode>"
4006  [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4007	(match_operand:VDQX 1 "s_register_operand" "+w"))
4008   (set (match_dup 1) (match_dup 0))]
4009  "TARGET_NEON && reload_completed"
4010  "vswp\t%<V_reg>0, %<V_reg>1"
4011  [(set_attr "type" "neon_permute<q>")]
4012)
4013
4014;; In this insn, operand 1 should be low, and operand 2 the high part of the
4015;; dest vector.
4016;; FIXME: A different implementation of this builtin could make it much
4017;; more likely that we wouldn't actually need to output anything (we could make
4018;; it so that the reg allocator puts things in the right places magically
4019;; instead). Lack of subregs for vectors makes that tricky though, I think.
4020
4021(define_insn_and_split "neon_vcombine<mode>"
4022  [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4023        (vec_concat:<V_DOUBLE>
4024	  (match_operand:VDX 1 "s_register_operand" "w")
4025	  (match_operand:VDX 2 "s_register_operand" "w")))]
4026  "TARGET_NEON"
4027  "#"
4028  "&& reload_completed"
4029  [(const_int 0)]
4030{
4031  neon_split_vcombine (operands);
4032  DONE;
4033}
4034[(set_attr "type" "multiple")]
4035)
4036
4037(define_expand "neon_vget_high<mode>"
4038  [(match_operand:<V_HALF> 0 "s_register_operand")
4039   (match_operand:VQX 1 "s_register_operand")]
4040  "TARGET_NEON"
4041{
4042  emit_move_insn (operands[0],
4043		  simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4044				       GET_MODE_SIZE (<V_HALF>mode)));
4045  DONE;
4046})
4047
4048(define_expand "neon_vget_low<mode>"
4049  [(match_operand:<V_HALF> 0 "s_register_operand")
4050   (match_operand:VQX 1 "s_register_operand")]
4051  "TARGET_NEON"
4052{
4053  emit_move_insn (operands[0],
4054		  simplify_gen_subreg (<V_HALF>mode, operands[1],
4055				       <MODE>mode, 0));
4056  DONE;
4057})
4058
4059(define_insn "float<mode><V_cvtto>2"
4060  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4061        (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4062  "TARGET_NEON && !flag_rounding_math"
4063  "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4064  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4065)
4066
4067(define_insn "floatuns<mode><V_cvtto>2"
4068  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4069        (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4070  "TARGET_NEON && !flag_rounding_math"
4071  "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4072  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4073)
4074
4075(define_insn "fix_trunc<mode><V_cvtto>2"
4076  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4077        (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4078  "TARGET_NEON"
4079  "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4080  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4081)
4082
4083(define_insn "fixuns_trunc<mode><V_cvtto>2"
4084  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4085        (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4086  "TARGET_NEON"
4087  "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4088  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4089)
4090
4091(define_insn "neon_vcvt<sup><mode>"
4092  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4093	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4094			  VCVT_US))]
4095  "TARGET_NEON"
4096  "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4097  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4098)
4099
4100(define_insn "neon_vcvt<sup><mode>"
4101  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4102	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4103			  VCVT_US))]
4104  "TARGET_NEON"
4105  "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4106  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4107)
4108
4109(define_insn "neon_vcvtv4sfv4hf"
4110  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4111	(unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4112			  UNSPEC_VCVT))]
4113  "TARGET_NEON && TARGET_FP16"
4114  "vcvt.f32.f16\t%q0, %P1"
4115  [(set_attr "type" "neon_fp_cvt_widen_h")]
4116)
4117
4118(define_insn "neon_vcvtv4hfv4sf"
4119  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4120	(unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4121			  UNSPEC_VCVT))]
4122  "TARGET_NEON && TARGET_FP16"
4123  "vcvt.f16.f32\t%P0, %q1"
4124  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4125)
4126
4127(define_insn "neon_vcvt<sup><mode>"
4128 [(set
4129   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4130   (unspec:<VH_CVTTO>
4131    [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4132    VCVT_US))]
4133 "TARGET_NEON_FP16INST"
4134 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4135  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4136)
4137
4138(define_insn "neon_vcvt<sup><mode>"
4139 [(set
4140   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4141   (unspec:<VH_CVTTO>
4142    [(match_operand:VH 1 "s_register_operand" "w")]
4143    VCVT_US))]
4144 "TARGET_NEON_FP16INST"
4145 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4146  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4147)
4148
4149(define_insn "neon_vcvt<sup>_n<mode>"
4150  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4151	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4152			   (match_operand:SI 2 "immediate_operand" "i")]
4153			  VCVT_US_N))]
4154  "TARGET_NEON"
4155{
4156  arm_const_bounds (operands[2], 1, 33);
4157  return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4158}
4159  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4160)
4161
4162(define_insn "neon_vcvt<sup>_n<mode>"
4163 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4164   (unspec:<VH_CVTTO>
4165    [(match_operand:VH 1 "s_register_operand" "w")
4166     (match_operand:SI 2 "immediate_operand" "i")]
4167    VCVT_US_N))]
4168  "TARGET_NEON_FP16INST"
4169{
4170  arm_const_bounds (operands[2], 0, 17);
4171  return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4172}
4173 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4174)
4175
4176(define_insn "neon_vcvt<sup>_n<mode>"
4177  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4178	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4179			   (match_operand:SI 2 "immediate_operand" "i")]
4180			  VCVT_US_N))]
4181  "TARGET_NEON"
4182{
4183  arm_const_bounds (operands[2], 1, 33);
4184  return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4185}
4186  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4187)
4188
4189(define_insn "neon_vcvt<sup>_n<mode>"
4190 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4191   (unspec:<VH_CVTTO>
4192    [(match_operand:VCVTHI 1 "s_register_operand" "w")
4193     (match_operand:SI 2 "immediate_operand" "i")]
4194    VCVT_US_N))]
4195 "TARGET_NEON_FP16INST"
4196{
4197  arm_const_bounds (operands[2], 0, 17);
4198  return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4199}
4200 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4201)
4202
4203(define_insn "neon_vcvt<vcvth_op><sup><mode>"
4204 [(set
4205   (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4206   (unspec:<VH_CVTTO>
4207    [(match_operand:VH 1 "s_register_operand" "w")]
4208    VCVT_HF_US))]
4209 "TARGET_NEON_FP16INST"
4210 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4211  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4212)
4213
4214(define_insn "neon_vmovn<mode>"
4215  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4216	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4217                           UNSPEC_VMOVN))]
4218  "TARGET_NEON"
4219  "vmovn.<V_if_elem>\t%P0, %q1"
4220  [(set_attr "type" "neon_shift_imm_narrow_q")]
4221)
4222
4223(define_insn "neon_vqmovn<sup><mode>"
4224  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4225	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4226                           VQMOVN))]
4227  "TARGET_NEON"
4228  "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4229  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4230)
4231
4232(define_insn "neon_vqmovun<mode>"
4233  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4234	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4235                           UNSPEC_VQMOVUN))]
4236  "TARGET_NEON"
4237  "vqmovun.<V_s_elem>\t%P0, %q1"
4238  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4239)
4240
4241(define_insn "neon_vmovl<sup><mode>"
4242  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4243	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4244                          VMOVL))]
4245  "TARGET_NEON"
4246  "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4247  [(set_attr "type" "neon_shift_imm_long")]
4248)
4249
4250(define_insn "neon_vmul_lane<mode>"
4251  [(set (match_operand:VMD 0 "s_register_operand" "=w")
4252	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4253		     (match_operand:VMD 2 "s_register_operand"
4254                                        "<scalar_mul_constraint>")
4255                     (match_operand:SI 3 "immediate_operand" "i")]
4256                    UNSPEC_VMUL_LANE))]
4257  "TARGET_NEON"
4258{
4259  return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4260}
4261  [(set (attr "type")
4262     (if_then_else (match_test "<Is_float_mode>")
4263                   (const_string "neon_fp_mul_s_scalar<q>")
4264                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4265)
4266
4267(define_insn "neon_vmul_lane<mode>"
4268  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4269	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4270		     (match_operand:<V_HALF> 2 "s_register_operand"
4271                                             "<scalar_mul_constraint>")
4272                     (match_operand:SI 3 "immediate_operand" "i")]
4273                    UNSPEC_VMUL_LANE))]
4274  "TARGET_NEON"
4275{
4276  return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4277}
4278  [(set (attr "type")
4279     (if_then_else (match_test "<Is_float_mode>")
4280                   (const_string "neon_fp_mul_s_scalar<q>")
4281                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4282)
4283
4284(define_insn "neon_vmul_lane<mode>"
4285  [(set (match_operand:VH 0 "s_register_operand" "=w")
4286	(unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4287		    (match_operand:V4HF 2 "s_register_operand"
4288		     "<scalar_mul_constraint>")
4289		     (match_operand:SI 3 "immediate_operand" "i")]
4290		     UNSPEC_VMUL_LANE))]
4291  "TARGET_NEON_FP16INST"
4292  "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4293  [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4294)
4295
4296(define_insn "neon_vmull<sup>_lane<mode>"
4297  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4298	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4299		           (match_operand:VMDI 2 "s_register_operand"
4300					       "<scalar_mul_constraint>")
4301                           (match_operand:SI 3 "immediate_operand" "i")]
4302                          VMULL_LANE))]
4303  "TARGET_NEON"
4304{
4305  return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4306}
4307  [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4308)
4309
4310(define_insn "neon_vqdmull_lane<mode>"
4311  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4312	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4313		           (match_operand:VMDI 2 "s_register_operand"
4314					       "<scalar_mul_constraint>")
4315                           (match_operand:SI 3 "immediate_operand" "i")]
4316                          UNSPEC_VQDMULL_LANE))]
4317  "TARGET_NEON"
4318{
4319  return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4320}
4321  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4322)
4323
4324(define_insn "neon_vq<r>dmulh_lane<mode>"
4325  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4326	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4327		      (match_operand:<V_HALF> 2 "s_register_operand"
4328					      "<scalar_mul_constraint>")
4329                      (match_operand:SI 3 "immediate_operand" "i")]
4330                      VQDMULH_LANE))]
4331  "TARGET_NEON"
4332{
4333  return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4334}
4335  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4336)
4337
4338(define_insn "neon_vq<r>dmulh_lane<mode>"
4339  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4340	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4341		      (match_operand:VMDI 2 "s_register_operand"
4342					  "<scalar_mul_constraint>")
4343                      (match_operand:SI 3 "immediate_operand" "i")]
4344                      VQDMULH_LANE))]
4345  "TARGET_NEON"
4346{
4347  return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4348}
4349  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4350)
4351
4352;; vqrdmlah_lane, vqrdmlsh_lane
4353(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4354  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4355	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4356		      (match_operand:VMQI 2 "s_register_operand" "w")
4357		      (match_operand:<V_HALF> 3 "s_register_operand"
4358					  "<scalar_mul_constraint>")
4359		      (match_operand:SI 4 "immediate_operand" "i")]
4360		     VQRDMLH_AS))]
4361  "TARGET_NEON_RDMA"
4362{
4363  return
4364   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4365}
4366  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4367)
4368
4369(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4370  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4371	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4372		      (match_operand:VMDI 2 "s_register_operand" "w")
4373		      (match_operand:VMDI 3 "s_register_operand"
4374					  "<scalar_mul_constraint>")
4375		      (match_operand:SI 4 "immediate_operand" "i")]
4376		     VQRDMLH_AS))]
4377  "TARGET_NEON_RDMA"
4378{
4379  return
4380   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4381}
4382  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4383)
4384
4385(define_insn "neon_vmla_lane<mode>"
4386  [(set (match_operand:VMD 0 "s_register_operand" "=w")
4387	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4388		     (match_operand:VMD 2 "s_register_operand" "w")
4389                     (match_operand:VMD 3 "s_register_operand"
4390					"<scalar_mul_constraint>")
4391                     (match_operand:SI 4 "immediate_operand" "i")]
4392                     UNSPEC_VMLA_LANE))]
4393  "TARGET_NEON"
4394{
4395  return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4396}
4397  [(set (attr "type")
4398     (if_then_else (match_test "<Is_float_mode>")
4399                   (const_string "neon_fp_mla_s_scalar<q>")
4400                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4401)
4402
4403(define_insn "neon_vmla_lane<mode>"
4404  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4405	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4406		     (match_operand:VMQ 2 "s_register_operand" "w")
4407                     (match_operand:<V_HALF> 3 "s_register_operand"
4408					     "<scalar_mul_constraint>")
4409                     (match_operand:SI 4 "immediate_operand" "i")]
4410                     UNSPEC_VMLA_LANE))]
4411  "TARGET_NEON"
4412{
4413  return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4414}
4415  [(set (attr "type")
4416     (if_then_else (match_test "<Is_float_mode>")
4417                   (const_string "neon_fp_mla_s_scalar<q>")
4418                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4419)
4420
4421(define_insn "neon_vmlal<sup>_lane<mode>"
4422  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4423	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4424			   (match_operand:VMDI 2 "s_register_operand" "w")
4425                           (match_operand:VMDI 3 "s_register_operand"
4426					       "<scalar_mul_constraint>")
4427                           (match_operand:SI 4 "immediate_operand" "i")]
4428                          VMLAL_LANE))]
4429  "TARGET_NEON"
4430{
4431  return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4432}
4433  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4434)
4435
4436(define_insn "neon_vqdmlal_lane<mode>"
4437  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4438	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4439			   (match_operand:VMDI 2 "s_register_operand" "w")
4440                           (match_operand:VMDI 3 "s_register_operand"
4441					       "<scalar_mul_constraint>")
4442                           (match_operand:SI 4 "immediate_operand" "i")]
4443                          UNSPEC_VQDMLAL_LANE))]
4444  "TARGET_NEON"
4445{
4446  return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4447}
4448  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4449)
4450
4451(define_insn "neon_vmls_lane<mode>"
4452  [(set (match_operand:VMD 0 "s_register_operand" "=w")
4453	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4454		     (match_operand:VMD 2 "s_register_operand" "w")
4455                     (match_operand:VMD 3 "s_register_operand"
4456					"<scalar_mul_constraint>")
4457                     (match_operand:SI 4 "immediate_operand" "i")]
4458                    UNSPEC_VMLS_LANE))]
4459  "TARGET_NEON"
4460{
4461  return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4462}
4463  [(set (attr "type")
4464     (if_then_else (match_test "<Is_float_mode>")
4465                   (const_string "neon_fp_mla_s_scalar<q>")
4466                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4467)
4468
4469(define_insn "neon_vmls_lane<mode>"
4470  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4471	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4472		     (match_operand:VMQ 2 "s_register_operand" "w")
4473                     (match_operand:<V_HALF> 3 "s_register_operand"
4474					     "<scalar_mul_constraint>")
4475                     (match_operand:SI 4 "immediate_operand" "i")]
4476                    UNSPEC_VMLS_LANE))]
4477  "TARGET_NEON"
4478{
4479  return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4480}
4481  [(set (attr "type")
4482     (if_then_else (match_test "<Is_float_mode>")
4483                   (const_string "neon_fp_mla_s_scalar<q>")
4484                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4485)
4486
4487(define_insn "neon_vmlsl<sup>_lane<mode>"
4488  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4489	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4490			   (match_operand:VMDI 2 "s_register_operand" "w")
4491                           (match_operand:VMDI 3 "s_register_operand"
4492					       "<scalar_mul_constraint>")
4493                           (match_operand:SI 4 "immediate_operand" "i")]
4494                          VMLSL_LANE))]
4495  "TARGET_NEON"
4496{
4497  return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4498}
4499  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4500)
4501
4502(define_insn "neon_vqdmlsl_lane<mode>"
4503  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4504	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4505			   (match_operand:VMDI 2 "s_register_operand" "w")
4506                           (match_operand:VMDI 3 "s_register_operand"
4507					       "<scalar_mul_constraint>")
4508                           (match_operand:SI 4 "immediate_operand" "i")]
4509                          UNSPEC_VQDMLSL_LANE))]
4510  "TARGET_NEON"
4511{
4512  return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4513}
4514  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4515)
4516
4517; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4518; core register into a temp register, then use a scalar taken from that. This
4519; isn't an optimal solution if e.g. the scalar has just been read from memory
4520; or extracted from another vector. The latter case it's currently better to
4521; use the "_lane" variant, and the former case can probably be implemented
4522; using vld1_lane, but that hasn't been done yet.
4523
4524(define_expand "neon_vmul_n<mode>"
4525  [(match_operand:VMD 0 "s_register_operand" "")
4526   (match_operand:VMD 1 "s_register_operand" "")
4527   (match_operand:<V_elem> 2 "s_register_operand" "")]
4528  "TARGET_NEON"
4529{
4530  rtx tmp = gen_reg_rtx (<MODE>mode);
4531  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4532  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4533				       const0_rtx));
4534  DONE;
4535})
4536
4537(define_expand "neon_vmul_n<mode>"
4538  [(match_operand:VMQ 0 "s_register_operand" "")
4539   (match_operand:VMQ 1 "s_register_operand" "")
4540   (match_operand:<V_elem> 2 "s_register_operand" "")]
4541  "TARGET_NEON"
4542{
4543  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4544  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4545  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4546				       const0_rtx));
4547  DONE;
4548})
4549
4550(define_expand "neon_vmul_n<mode>"
4551  [(match_operand:VH 0 "s_register_operand")
4552   (match_operand:VH 1 "s_register_operand")
4553   (match_operand:<V_elem> 2 "s_register_operand")]
4554  "TARGET_NEON_FP16INST"
4555{
4556  rtx tmp = gen_reg_rtx (V4HFmode);
4557  emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4558  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4559				       const0_rtx));
4560  DONE;
4561})
4562
4563(define_expand "neon_vmulls_n<mode>"
4564  [(match_operand:<V_widen> 0 "s_register_operand" "")
4565   (match_operand:VMDI 1 "s_register_operand" "")
4566   (match_operand:<V_elem> 2 "s_register_operand" "")]
4567  "TARGET_NEON"
4568{
4569  rtx tmp = gen_reg_rtx (<MODE>mode);
4570  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4571  emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4572					 const0_rtx));
4573  DONE;
4574})
4575
4576(define_expand "neon_vmullu_n<mode>"
4577  [(match_operand:<V_widen> 0 "s_register_operand" "")
4578   (match_operand:VMDI 1 "s_register_operand" "")
4579   (match_operand:<V_elem> 2 "s_register_operand" "")]
4580  "TARGET_NEON"
4581{
4582  rtx tmp = gen_reg_rtx (<MODE>mode);
4583  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4584  emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4585					 const0_rtx));
4586  DONE;
4587})
4588
4589(define_expand "neon_vqdmull_n<mode>"
4590  [(match_operand:<V_widen> 0 "s_register_operand" "")
4591   (match_operand:VMDI 1 "s_register_operand" "")
4592   (match_operand:<V_elem> 2 "s_register_operand" "")]
4593  "TARGET_NEON"
4594{
4595  rtx tmp = gen_reg_rtx (<MODE>mode);
4596  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4597  emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4598				          const0_rtx));
4599  DONE;
4600})
4601
4602(define_expand "neon_vqdmulh_n<mode>"
4603  [(match_operand:VMDI 0 "s_register_operand" "")
4604   (match_operand:VMDI 1 "s_register_operand" "")
4605   (match_operand:<V_elem> 2 "s_register_operand" "")]
4606  "TARGET_NEON"
4607{
4608  rtx tmp = gen_reg_rtx (<MODE>mode);
4609  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4610  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4611				          const0_rtx));
4612  DONE;
4613})
4614
4615(define_expand "neon_vqrdmulh_n<mode>"
4616  [(match_operand:VMDI 0 "s_register_operand" "")
4617   (match_operand:VMDI 1 "s_register_operand" "")
4618   (match_operand:<V_elem> 2 "s_register_operand" "")]
4619  "TARGET_NEON"
4620{
4621  rtx tmp = gen_reg_rtx (<MODE>mode);
4622  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4623  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4624				          const0_rtx));
4625  DONE;
4626})
4627
4628(define_expand "neon_vqdmulh_n<mode>"
4629  [(match_operand:VMQI 0 "s_register_operand" "")
4630   (match_operand:VMQI 1 "s_register_operand" "")
4631   (match_operand:<V_elem> 2 "s_register_operand" "")]
4632  "TARGET_NEON"
4633{
4634  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4635  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4636  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4637					  const0_rtx));
4638  DONE;
4639})
4640
4641(define_expand "neon_vqrdmulh_n<mode>"
4642  [(match_operand:VMQI 0 "s_register_operand" "")
4643   (match_operand:VMQI 1 "s_register_operand" "")
4644   (match_operand:<V_elem> 2 "s_register_operand" "")]
4645  "TARGET_NEON"
4646{
4647  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4648  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4649  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4650					   const0_rtx));
4651  DONE;
4652})
4653
4654(define_expand "neon_vmla_n<mode>"
4655  [(match_operand:VMD 0 "s_register_operand" "")
4656   (match_operand:VMD 1 "s_register_operand" "")
4657   (match_operand:VMD 2 "s_register_operand" "")
4658   (match_operand:<V_elem> 3 "s_register_operand" "")]
4659  "TARGET_NEON"
4660{
4661  rtx tmp = gen_reg_rtx (<MODE>mode);
4662  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4663  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4664				       tmp, const0_rtx));
4665  DONE;
4666})
4667
4668(define_expand "neon_vmla_n<mode>"
4669  [(match_operand:VMQ 0 "s_register_operand" "")
4670   (match_operand:VMQ 1 "s_register_operand" "")
4671   (match_operand:VMQ 2 "s_register_operand" "")
4672   (match_operand:<V_elem> 3 "s_register_operand" "")]
4673  "TARGET_NEON"
4674{
4675  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4676  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4677  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4678				       tmp, const0_rtx));
4679  DONE;
4680})
4681
4682(define_expand "neon_vmlals_n<mode>"
4683  [(match_operand:<V_widen> 0 "s_register_operand" "")
4684   (match_operand:<V_widen> 1 "s_register_operand" "")
4685   (match_operand:VMDI 2 "s_register_operand" "")
4686   (match_operand:<V_elem> 3 "s_register_operand" "")]
4687  "TARGET_NEON"
4688{
4689  rtx tmp = gen_reg_rtx (<MODE>mode);
4690  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4691  emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4692					 tmp, const0_rtx));
4693  DONE;
4694})
4695
4696(define_expand "neon_vmlalu_n<mode>"
4697  [(match_operand:<V_widen> 0 "s_register_operand" "")
4698   (match_operand:<V_widen> 1 "s_register_operand" "")
4699   (match_operand:VMDI 2 "s_register_operand" "")
4700   (match_operand:<V_elem> 3 "s_register_operand" "")]
4701  "TARGET_NEON"
4702{
4703  rtx tmp = gen_reg_rtx (<MODE>mode);
4704  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4705  emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4706					 tmp, const0_rtx));
4707  DONE;
4708})
4709
4710(define_expand "neon_vqdmlal_n<mode>"
4711  [(match_operand:<V_widen> 0 "s_register_operand" "")
4712   (match_operand:<V_widen> 1 "s_register_operand" "")
4713   (match_operand:VMDI 2 "s_register_operand" "")
4714   (match_operand:<V_elem> 3 "s_register_operand" "")]
4715  "TARGET_NEON"
4716{
4717  rtx tmp = gen_reg_rtx (<MODE>mode);
4718  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4719  emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4720					  tmp, const0_rtx));
4721  DONE;
4722})
4723
4724(define_expand "neon_vmls_n<mode>"
4725  [(match_operand:VMD 0 "s_register_operand" "")
4726   (match_operand:VMD 1 "s_register_operand" "")
4727   (match_operand:VMD 2 "s_register_operand" "")
4728   (match_operand:<V_elem> 3 "s_register_operand" "")]
4729  "TARGET_NEON"
4730{
4731  rtx tmp = gen_reg_rtx (<MODE>mode);
4732  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4733  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4734				       tmp, const0_rtx));
4735  DONE;
4736})
4737
4738(define_expand "neon_vmls_n<mode>"
4739  [(match_operand:VMQ 0 "s_register_operand" "")
4740   (match_operand:VMQ 1 "s_register_operand" "")
4741   (match_operand:VMQ 2 "s_register_operand" "")
4742   (match_operand:<V_elem> 3 "s_register_operand" "")]
4743  "TARGET_NEON"
4744{
4745  rtx tmp = gen_reg_rtx (<V_HALF>mode);
4746  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4747  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4748				       tmp, const0_rtx));
4749  DONE;
4750})
4751
4752(define_expand "neon_vmlsls_n<mode>"
4753  [(match_operand:<V_widen> 0 "s_register_operand" "")
4754   (match_operand:<V_widen> 1 "s_register_operand" "")
4755   (match_operand:VMDI 2 "s_register_operand" "")
4756   (match_operand:<V_elem> 3 "s_register_operand" "")]
4757  "TARGET_NEON"
4758{
4759  rtx tmp = gen_reg_rtx (<MODE>mode);
4760  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4761  emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4762					tmp, const0_rtx));
4763  DONE;
4764})
4765
4766(define_expand "neon_vmlslu_n<mode>"
4767  [(match_operand:<V_widen> 0 "s_register_operand" "")
4768   (match_operand:<V_widen> 1 "s_register_operand" "")
4769   (match_operand:VMDI 2 "s_register_operand" "")
4770   (match_operand:<V_elem> 3 "s_register_operand" "")]
4771  "TARGET_NEON"
4772{
4773  rtx tmp = gen_reg_rtx (<MODE>mode);
4774  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4775  emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4776					tmp, const0_rtx));
4777  DONE;
4778})
4779
4780(define_expand "neon_vqdmlsl_n<mode>"
4781  [(match_operand:<V_widen> 0 "s_register_operand" "")
4782   (match_operand:<V_widen> 1 "s_register_operand" "")
4783   (match_operand:VMDI 2 "s_register_operand" "")
4784   (match_operand:<V_elem> 3 "s_register_operand" "")]
4785  "TARGET_NEON"
4786{
4787  rtx tmp = gen_reg_rtx (<MODE>mode);
4788  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4789  emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4790					  tmp, const0_rtx));
4791  DONE;
4792})
4793
4794(define_insn "neon_vext<mode>"
4795  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4796	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4797		      (match_operand:VDQX 2 "s_register_operand" "w")
4798                      (match_operand:SI 3 "immediate_operand" "i")]
4799                     UNSPEC_VEXT))]
4800  "TARGET_NEON"
4801{
4802  arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4803  return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4804}
4805  [(set_attr "type" "neon_ext<q>")]
4806)
4807
4808(define_insn "neon_vrev64<mode>"
4809  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4810	(unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4811                    UNSPEC_VREV64))]
4812  "TARGET_NEON"
4813  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4814  [(set_attr "type" "neon_rev<q>")]
4815)
4816
4817(define_insn "neon_vrev32<mode>"
4818  [(set (match_operand:VX 0 "s_register_operand" "=w")
4819	(unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4820                   UNSPEC_VREV32))]
4821  "TARGET_NEON"
4822  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4823  [(set_attr "type" "neon_rev<q>")]
4824)
4825
4826(define_insn "neon_vrev16<mode>"
4827  [(set (match_operand:VE 0 "s_register_operand" "=w")
4828	(unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4829                   UNSPEC_VREV16))]
4830  "TARGET_NEON"
4831  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4832  [(set_attr "type" "neon_rev<q>")]
4833)
4834
4835; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4836; allocation. For an intrinsic of form:
4837;   rD = vbsl_* (rS, rN, rM)
4838; We can use any of:
4839;   vbsl rS, rN, rM  (if D = S)
4840;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4841;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4842
4843(define_insn "neon_vbsl<mode>_internal"
4844  [(set (match_operand:VDQX 0 "s_register_operand"		 "=w,w,w")
4845	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4846		      (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4847                      (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4848                     UNSPEC_VBSL))]
4849  "TARGET_NEON"
4850  "@
4851  vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4852  vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4853  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4854  [(set_attr "type" "neon_bsl<q>")]
4855)
4856
4857(define_expand "neon_vbsl<mode>"
4858  [(set (match_operand:VDQX 0 "s_register_operand" "")
4859        (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4860                      (match_operand:VDQX 2 "s_register_operand" "")
4861                      (match_operand:VDQX 3 "s_register_operand" "")]
4862                     UNSPEC_VBSL))]
4863  "TARGET_NEON"
4864{
4865  /* We can't alias operands together if they have different modes.  */
4866  operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4867})
4868
4869;; vshl, vrshl
4870(define_insn "neon_v<shift_op><sup><mode>"
4871  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4872	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4873		       (match_operand:VDQIX 2 "s_register_operand" "w")]
4874                      VSHL))]
4875  "TARGET_NEON"
4876  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4877  [(set_attr "type" "neon_shift_imm<q>")]
4878)
4879
4880;; vqshl, vqrshl
4881(define_insn "neon_v<shift_op><sup><mode>"
4882  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4883	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4884		       (match_operand:VDQIX 2 "s_register_operand" "w")]
4885                      VQSHL))]
4886  "TARGET_NEON"
4887  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4888  [(set_attr "type" "neon_sat_shift_imm<q>")]
4889)
4890
4891;; vshr_n, vrshr_n
4892(define_insn "neon_v<shift_op><sup>_n<mode>"
4893  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4894	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4895		       (match_operand:SI 2 "immediate_operand" "i")]
4896                      VSHR_N))]
4897  "TARGET_NEON"
4898{
4899  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4900  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4901}
4902  [(set_attr "type" "neon_shift_imm<q>")]
4903)
4904
4905;; vshrn_n, vrshrn_n
4906(define_insn "neon_v<shift_op>_n<mode>"
4907  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4908	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4909			    (match_operand:SI 2 "immediate_operand" "i")]
4910                           VSHRN_N))]
4911  "TARGET_NEON"
4912{
4913  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4914  return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4915}
4916  [(set_attr "type" "neon_shift_imm_narrow_q")]
4917)
4918
4919;; vqshrn_n, vqrshrn_n
4920(define_insn "neon_v<shift_op><sup>_n<mode>"
4921  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4922	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4923			    (match_operand:SI 2 "immediate_operand" "i")]
4924                           VQSHRN_N))]
4925  "TARGET_NEON"
4926{
4927  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4928  return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4929}
4930  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4931)
4932
4933;; vqshrun_n, vqrshrun_n
4934(define_insn "neon_v<shift_op>_n<mode>"
4935  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4936	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4937			    (match_operand:SI 2 "immediate_operand" "i")]
4938                           VQSHRUN_N))]
4939  "TARGET_NEON"
4940{
4941  arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4942  return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4943}
4944  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4945)
4946
4947(define_insn "neon_vshl_n<mode>"
4948  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4949	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4950		       (match_operand:SI 2 "immediate_operand" "i")]
4951                      UNSPEC_VSHL_N))]
4952  "TARGET_NEON"
4953{
4954  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4955  return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4956}
4957  [(set_attr "type" "neon_shift_imm<q>")]
4958)
4959
4960(define_insn "neon_vqshl_<sup>_n<mode>"
4961  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4962	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4963		       (match_operand:SI 2 "immediate_operand" "i")]
4964                      VQSHL_N))]
4965  "TARGET_NEON"
4966{
4967  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4968  return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4969}
4970  [(set_attr "type" "neon_sat_shift_imm<q>")]
4971)
4972
4973(define_insn "neon_vqshlu_n<mode>"
4974  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4975	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4976		       (match_operand:SI 2 "immediate_operand" "i")]
4977                      UNSPEC_VQSHLU_N))]
4978  "TARGET_NEON"
4979{
4980  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4981  return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4982}
4983  [(set_attr "type" "neon_sat_shift_imm<q>")]
4984)
4985
4986(define_insn "neon_vshll<sup>_n<mode>"
4987  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4988	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4989			   (match_operand:SI 2 "immediate_operand" "i")]
4990			  VSHLL_N))]
4991  "TARGET_NEON"
4992{
4993  /* The boundaries are: 0 < imm <= size.  */
4994  arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4995  return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4996}
4997  [(set_attr "type" "neon_shift_imm_long")]
4998)
4999
5000;; vsra_n, vrsra_n
5001(define_insn "neon_v<shift_op><sup>_n<mode>"
5002  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5003	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5004		       (match_operand:VDQIX 2 "s_register_operand" "w")
5005                       (match_operand:SI 3 "immediate_operand" "i")]
5006                      VSRA_N))]
5007  "TARGET_NEON"
5008{
5009  arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5010  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5011}
5012  [(set_attr "type" "neon_shift_acc<q>")]
5013)
5014
5015(define_insn "neon_vsri_n<mode>"
5016  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5017	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5018        	       (match_operand:VDQIX 2 "s_register_operand" "w")
5019                       (match_operand:SI 3 "immediate_operand" "i")]
5020                      UNSPEC_VSRI))]
5021  "TARGET_NEON"
5022{
5023  arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5024  return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5025}
5026  [(set_attr "type" "neon_shift_reg<q>")]
5027)
5028
5029(define_insn "neon_vsli_n<mode>"
5030  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5031	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5032        	       (match_operand:VDQIX 2 "s_register_operand" "w")
5033                       (match_operand:SI 3 "immediate_operand" "i")]
5034                      UNSPEC_VSLI))]
5035  "TARGET_NEON"
5036{
5037  arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5038  return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5039}
5040  [(set_attr "type" "neon_shift_reg<q>")]
5041)
5042
5043(define_insn "neon_vtbl1v8qi"
5044  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5045	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5046		      (match_operand:V8QI 2 "s_register_operand" "w")]
5047                     UNSPEC_VTBL))]
5048  "TARGET_NEON"
5049  "vtbl.8\t%P0, {%P1}, %P2"
5050  [(set_attr "type" "neon_tbl1")]
5051)
5052
5053(define_insn "neon_vtbl2v8qi"
5054  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5055	(unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5056		      (match_operand:V8QI 2 "s_register_operand" "w")]
5057                     UNSPEC_VTBL))]
5058  "TARGET_NEON"
5059{
5060  rtx ops[4];
5061  int tabbase = REGNO (operands[1]);
5062
5063  ops[0] = operands[0];
5064  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5065  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5066  ops[3] = operands[2];
5067  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5068
5069  return "";
5070}
5071  [(set_attr "type" "neon_tbl2")]
5072)
5073
5074(define_insn "neon_vtbl3v8qi"
5075  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5076	(unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5077		      (match_operand:V8QI 2 "s_register_operand" "w")]
5078                     UNSPEC_VTBL))]
5079  "TARGET_NEON"
5080{
5081  rtx ops[5];
5082  int tabbase = REGNO (operands[1]);
5083
5084  ops[0] = operands[0];
5085  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5086  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5087  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5088  ops[4] = operands[2];
5089  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5090
5091  return "";
5092}
5093  [(set_attr "type" "neon_tbl3")]
5094)
5095
5096(define_insn "neon_vtbl4v8qi"
5097  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5098	(unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5099		      (match_operand:V8QI 2 "s_register_operand" "w")]
5100                     UNSPEC_VTBL))]
5101  "TARGET_NEON"
5102{
5103  rtx ops[6];
5104  int tabbase = REGNO (operands[1]);
5105
5106  ops[0] = operands[0];
5107  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5108  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5109  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5110  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5111  ops[5] = operands[2];
5112  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5113
5114  return "";
5115}
5116  [(set_attr "type" "neon_tbl4")]
5117)
5118
5119;; These three are used by the vec_perm infrastructure for V16QImode.
5120(define_insn_and_split "neon_vtbl1v16qi"
5121  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5122	(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5123		       (match_operand:V16QI 2 "s_register_operand" "w")]
5124		      UNSPEC_VTBL))]
5125  "TARGET_NEON"
5126  "#"
5127  "&& reload_completed"
5128  [(const_int 0)]
5129{
5130  rtx op0, op1, op2, part0, part2;
5131  unsigned ofs;
5132
5133  op0 = operands[0];
5134  op1 = gen_lowpart (TImode, operands[1]);
5135  op2 = operands[2];
5136
5137  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5138  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5139  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5140  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5141
5142  ofs = subreg_highpart_offset (V8QImode, V16QImode);
5143  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5144  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5145  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5146  DONE;
5147}
5148  [(set_attr "type" "multiple")]
5149)
5150
5151(define_insn_and_split "neon_vtbl2v16qi"
5152  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5153	(unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5154		       (match_operand:V16QI 2 "s_register_operand" "w")]
5155		      UNSPEC_VTBL))]
5156  "TARGET_NEON"
5157  "#"
5158  "&& reload_completed"
5159  [(const_int 0)]
5160{
5161  rtx op0, op1, op2, part0, part2;
5162  unsigned ofs;
5163
5164  op0 = operands[0];
5165  op1 = operands[1];
5166  op2 = operands[2];
5167
5168  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5169  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5170  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5171  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5172
5173  ofs = subreg_highpart_offset (V8QImode, V16QImode);
5174  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5175  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5176  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5177  DONE;
5178}
5179  [(set_attr "type" "multiple")]
5180)
5181
5182;; ??? Logically we should extend the regular neon_vcombine pattern to
5183;; handle quad-word input modes, producing octa-word output modes.  But
5184;; that requires us to add support for octa-word vector modes in moves.
5185;; That seems overkill for this one use in vec_perm.
5186(define_insn_and_split "neon_vcombinev16qi"
5187  [(set (match_operand:OI 0 "s_register_operand" "=w")
5188	(unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5189		    (match_operand:V16QI 2 "s_register_operand" "w")]
5190		   UNSPEC_VCONCAT))]
5191  "TARGET_NEON"
5192  "#"
5193  "&& reload_completed"
5194  [(const_int 0)]
5195{
5196  neon_split_vcombine (operands);
5197  DONE;
5198}
5199[(set_attr "type" "multiple")]
5200)
5201
5202(define_insn "neon_vtbx1v8qi"
5203  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5204	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5205		      (match_operand:V8QI 2 "s_register_operand" "w")
5206		      (match_operand:V8QI 3 "s_register_operand" "w")]
5207                     UNSPEC_VTBX))]
5208  "TARGET_NEON"
5209  "vtbx.8\t%P0, {%P2}, %P3"
5210  [(set_attr "type" "neon_tbl1")]
5211)
5212
5213(define_insn "neon_vtbx2v8qi"
5214  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5215	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5216		      (match_operand:TI 2 "s_register_operand" "w")
5217		      (match_operand:V8QI 3 "s_register_operand" "w")]
5218                     UNSPEC_VTBX))]
5219  "TARGET_NEON"
5220{
5221  rtx ops[4];
5222  int tabbase = REGNO (operands[2]);
5223
5224  ops[0] = operands[0];
5225  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5226  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5227  ops[3] = operands[3];
5228  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5229
5230  return "";
5231}
5232  [(set_attr "type" "neon_tbl2")]
5233)
5234
5235(define_insn "neon_vtbx3v8qi"
5236  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5237	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5238		      (match_operand:EI 2 "s_register_operand" "w")
5239		      (match_operand:V8QI 3 "s_register_operand" "w")]
5240                     UNSPEC_VTBX))]
5241  "TARGET_NEON"
5242{
5243  rtx ops[5];
5244  int tabbase = REGNO (operands[2]);
5245
5246  ops[0] = operands[0];
5247  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5248  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5249  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5250  ops[4] = operands[3];
5251  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5252
5253  return "";
5254}
5255  [(set_attr "type" "neon_tbl3")]
5256)
5257
5258(define_insn "neon_vtbx4v8qi"
5259  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5260	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5261		      (match_operand:OI 2 "s_register_operand" "w")
5262		      (match_operand:V8QI 3 "s_register_operand" "w")]
5263                     UNSPEC_VTBX))]
5264  "TARGET_NEON"
5265{
5266  rtx ops[6];
5267  int tabbase = REGNO (operands[2]);
5268
5269  ops[0] = operands[0];
5270  ops[1] = gen_rtx_REG (V8QImode, tabbase);
5271  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5272  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5273  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5274  ops[5] = operands[3];
5275  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5276
5277  return "";
5278}
5279  [(set_attr "type" "neon_tbl4")]
5280)
5281
5282(define_expand "neon_vtrn<mode>_internal"
5283  [(parallel
5284    [(set (match_operand:VDQWH 0 "s_register_operand")
5285	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5286			 (match_operand:VDQWH 2 "s_register_operand")]
5287	   UNSPEC_VTRN1))
5288     (set (match_operand:VDQWH 3 "s_register_operand")
5289	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5290  "TARGET_NEON"
5291  ""
5292)
5293
5294;; Note: Different operand numbering to handle tied registers correctly.
5295(define_insn "*neon_vtrn<mode>_insn"
5296  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5297	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5298		       (match_operand:VDQWH 3 "s_register_operand" "2")]
5299	 UNSPEC_VTRN1))
5300   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5301	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
5302	 UNSPEC_VTRN2))]
5303  "TARGET_NEON"
5304  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5305  [(set_attr "type" "neon_permute<q>")]
5306)
5307
5308(define_expand "neon_vzip<mode>_internal"
5309  [(parallel
5310    [(set (match_operand:VDQWH 0 "s_register_operand")
5311	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5312			 (match_operand:VDQWH 2 "s_register_operand")]
5313	   UNSPEC_VZIP1))
5314    (set (match_operand:VDQWH 3 "s_register_operand")
5315	 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5316  "TARGET_NEON"
5317  ""
5318)
5319
5320;; Note: Different operand numbering to handle tied registers correctly.
5321(define_insn "*neon_vzip<mode>_insn"
5322  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5323	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5324		       (match_operand:VDQWH 3 "s_register_operand" "2")]
5325	 UNSPEC_VZIP1))
5326   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5327	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
5328	 UNSPEC_VZIP2))]
5329  "TARGET_NEON"
5330  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5331  [(set_attr "type" "neon_zip<q>")]
5332)
5333
5334(define_expand "neon_vuzp<mode>_internal"
5335  [(parallel
5336    [(set (match_operand:VDQWH 0 "s_register_operand")
5337	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5338			(match_operand:VDQWH 2 "s_register_operand")]
5339	   UNSPEC_VUZP1))
5340     (set (match_operand:VDQWH 3 "s_register_operand" "")
5341	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5342  "TARGET_NEON"
5343  ""
5344)
5345
5346;; Note: Different operand numbering to handle tied registers correctly.
5347(define_insn "*neon_vuzp<mode>_insn"
5348  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5349	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5350		       (match_operand:VDQWH 3 "s_register_operand" "2")]
5351	 UNSPEC_VUZP1))
5352   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5353	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
5354	 UNSPEC_VUZP2))]
5355  "TARGET_NEON"
5356  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5357  [(set_attr "type" "neon_zip<q>")]
5358)
5359
5360(define_expand "vec_load_lanes<mode><mode>"
5361  [(set (match_operand:VDQX 0 "s_register_operand")
5362        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5363                     UNSPEC_VLD1))]
5364  "TARGET_NEON")
5365
5366(define_insn "neon_vld1<mode>"
5367  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5368        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5369                    UNSPEC_VLD1))]
5370  "TARGET_NEON"
5371  "vld1.<V_sz_elem>\t%h0, %A1"
5372  [(set_attr "type" "neon_load1_1reg<q>")]
5373)
5374
5375;; The lane numbers in the RTL are in GCC lane order, having been flipped
5376;; in arm_expand_neon_args. The lane numbers are restored to architectural
5377;; lane order here.
5378(define_insn "neon_vld1_lane<mode>"
5379  [(set (match_operand:VDX 0 "s_register_operand" "=w")
5380        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5381                     (match_operand:VDX 2 "s_register_operand" "0")
5382                     (match_operand:SI 3 "immediate_operand" "i")]
5383                    UNSPEC_VLD1_LANE))]
5384  "TARGET_NEON"
5385{
5386  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5387  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5388  operands[3] = GEN_INT (lane);
5389  if (max == 1)
5390    return "vld1.<V_sz_elem>\t%P0, %A1";
5391  else
5392    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5393}
5394  [(set_attr "type" "neon_load1_one_lane<q>")]
5395)
5396
5397;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5398;; here on big endian targets.
5399(define_insn "neon_vld1_lane<mode>"
5400  [(set (match_operand:VQX 0 "s_register_operand" "=w")
5401        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5402                     (match_operand:VQX 2 "s_register_operand" "0")
5403                     (match_operand:SI 3 "immediate_operand" "i")]
5404                    UNSPEC_VLD1_LANE))]
5405  "TARGET_NEON"
5406{
5407  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5408  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5409  operands[3] = GEN_INT (lane);
5410  int regno = REGNO (operands[0]);
5411  if (lane >= max / 2)
5412    {
5413      lane -= max / 2;
5414      regno += 2;
5415      operands[3] = GEN_INT (lane);
5416    }
5417  operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5418  if (max == 2)
5419    return "vld1.<V_sz_elem>\t%P0, %A1";
5420  else
5421    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5422}
5423  [(set_attr "type" "neon_load1_one_lane<q>")]
5424)
5425
5426(define_insn "neon_vld1_dup<mode>"
5427  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5428        (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5429  "TARGET_NEON"
5430  "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5431  [(set_attr "type" "neon_load1_all_lanes<q>")]
5432)
5433
5434;; Special case for DImode.  Treat it exactly like a simple load.
5435(define_expand "neon_vld1_dupdi"
5436  [(set (match_operand:DI 0 "s_register_operand" "")
5437        (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5438		   UNSPEC_VLD1))]
5439  "TARGET_NEON"
5440  ""
5441)
5442
5443(define_insn "neon_vld1_dup<mode>"
5444  [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5445        (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5446  "TARGET_NEON"
5447{
5448  return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5449}
5450  [(set_attr "type" "neon_load1_all_lanes<q>")]
5451)
5452
5453(define_insn_and_split "neon_vld1_dupv2di"
5454   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5455    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5456   "TARGET_NEON"
5457   "#"
5458   "&& reload_completed"
5459   [(const_int 0)]
5460   {
5461    rtx tmprtx = gen_lowpart (DImode, operands[0]);
5462    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5463    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5464    DONE;
5465    }
5466  [(set_attr "length" "8")
5467   (set_attr "type" "neon_load1_all_lanes_q")]
5468)
5469
5470(define_expand "vec_store_lanes<mode><mode>"
5471  [(set (match_operand:VDQX 0 "neon_struct_operand")
5472	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5473		     UNSPEC_VST1))]
5474  "TARGET_NEON")
5475
5476(define_insn "neon_vst1<mode>"
5477  [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5478	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5479		     UNSPEC_VST1))]
5480  "TARGET_NEON"
5481  "vst1.<V_sz_elem>\t%h1, %A0"
5482  [(set_attr "type" "neon_store1_1reg<q>")])
5483
5484;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5485;; here on big endian targets.
5486(define_insn "neon_vst1_lane<mode>"
5487  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5488	(unspec:<V_elem>
5489	  [(match_operand:VDX 1 "s_register_operand" "w")
5490	   (match_operand:SI 2 "immediate_operand" "i")]
5491	  UNSPEC_VST1_LANE))]
5492  "TARGET_NEON"
5493{
5494  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5495  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5496  operands[2] = GEN_INT (lane);
5497  if (max == 1)
5498    return "vst1.<V_sz_elem>\t{%P1}, %A0";
5499  else
5500    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5501}
5502  [(set_attr "type" "neon_store1_one_lane<q>")]
5503)
5504
5505;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5506;; here on big endian targets.
5507(define_insn "neon_vst1_lane<mode>"
5508  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5509	(unspec:<V_elem>
5510	  [(match_operand:VQX 1 "s_register_operand" "w")
5511	   (match_operand:SI 2 "immediate_operand" "i")]
5512	  UNSPEC_VST1_LANE))]
5513  "TARGET_NEON"
5514{
5515  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5516  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5517  int regno = REGNO (operands[1]);
5518  if (lane >= max / 2)
5519    {
5520      lane -= max / 2;
5521      regno += 2;
5522    }
5523  operands[2] = GEN_INT (lane);
5524  operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5525  if (max == 2)
5526    return "vst1.<V_sz_elem>\t{%P1}, %A0";
5527  else
5528    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5529}
5530  [(set_attr "type" "neon_store1_one_lane<q>")]
5531)
5532
5533(define_expand "vec_load_lanesti<mode>"
5534  [(set (match_operand:TI 0 "s_register_operand")
5535        (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5536                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5537		   UNSPEC_VLD2))]
5538  "TARGET_NEON")
5539
5540(define_insn "neon_vld2<mode>"
5541  [(set (match_operand:TI 0 "s_register_operand" "=w")
5542        (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5543                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5544                   UNSPEC_VLD2))]
5545  "TARGET_NEON"
5546{
5547  if (<V_sz_elem> == 64)
5548    return "vld1.64\t%h0, %A1";
5549  else
5550    return "vld2.<V_sz_elem>\t%h0, %A1";
5551}
5552  [(set (attr "type")
5553      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5554                    (const_string "neon_load1_2reg<q>")
5555                    (const_string "neon_load2_2reg<q>")))]
5556)
5557
5558(define_expand "vec_load_lanesoi<mode>"
5559  [(set (match_operand:OI 0 "s_register_operand")
5560        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5561                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5562		   UNSPEC_VLD2))]
5563  "TARGET_NEON")
5564
5565(define_insn "neon_vld2<mode>"
5566  [(set (match_operand:OI 0 "s_register_operand" "=w")
5567        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5568                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569                   UNSPEC_VLD2))]
5570  "TARGET_NEON"
5571  "vld2.<V_sz_elem>\t%h0, %A1"
5572  [(set_attr "type" "neon_load2_2reg_q")])
5573
5574;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5575;; here on big endian targets.
5576(define_insn "neon_vld2_lane<mode>"
5577  [(set (match_operand:TI 0 "s_register_operand" "=w")
5578        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5579                    (match_operand:TI 2 "s_register_operand" "0")
5580                    (match_operand:SI 3 "immediate_operand" "i")
5581                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5582                   UNSPEC_VLD2_LANE))]
5583  "TARGET_NEON"
5584{
5585  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5586  int regno = REGNO (operands[0]);
5587  rtx ops[4];
5588  ops[0] = gen_rtx_REG (DImode, regno);
5589  ops[1] = gen_rtx_REG (DImode, regno + 2);
5590  ops[2] = operands[1];
5591  ops[3] = GEN_INT (lane);
5592  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5593  return "";
5594}
5595  [(set_attr "type" "neon_load2_one_lane<q>")]
5596)
5597
5598;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5599;; here on big endian targets.
5600(define_insn "neon_vld2_lane<mode>"
5601  [(set (match_operand:OI 0 "s_register_operand" "=w")
5602        (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5603                    (match_operand:OI 2 "s_register_operand" "0")
5604                    (match_operand:SI 3 "immediate_operand" "i")
5605                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5606                   UNSPEC_VLD2_LANE))]
5607  "TARGET_NEON"
5608{
5609  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5610  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5611  int regno = REGNO (operands[0]);
5612  rtx ops[4];
5613  if (lane >= max / 2)
5614    {
5615      lane -= max / 2;
5616      regno += 2;
5617    }
5618  ops[0] = gen_rtx_REG (DImode, regno);
5619  ops[1] = gen_rtx_REG (DImode, regno + 4);
5620  ops[2] = operands[1];
5621  ops[3] = GEN_INT (lane);
5622  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5623  return "";
5624}
5625  [(set_attr "type" "neon_load2_one_lane<q>")]
5626)
5627
5628(define_insn "neon_vld2_dup<mode>"
5629  [(set (match_operand:TI 0 "s_register_operand" "=w")
5630        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5631                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5632                   UNSPEC_VLD2_DUP))]
5633  "TARGET_NEON"
5634{
5635  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5636    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5637  else
5638    return "vld1.<V_sz_elem>\t%h0, %A1";
5639}
5640  [(set (attr "type")
5641      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5642                    (const_string "neon_load2_all_lanes<q>")
5643                    (const_string "neon_load1_1reg<q>")))]
5644)
5645
5646(define_expand "vec_store_lanesti<mode>"
5647  [(set (match_operand:TI 0 "neon_struct_operand")
5648	(unspec:TI [(match_operand:TI 1 "s_register_operand")
5649                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5650                   UNSPEC_VST2))]
5651  "TARGET_NEON")
5652
5653(define_insn "neon_vst2<mode>"
5654  [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5655        (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5656                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5657                   UNSPEC_VST2))]
5658  "TARGET_NEON"
5659{
5660  if (<V_sz_elem> == 64)
5661    return "vst1.64\t%h1, %A0";
5662  else
5663    return "vst2.<V_sz_elem>\t%h1, %A0";
5664}
5665  [(set (attr "type")
5666      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5667                    (const_string "neon_store1_2reg<q>")
5668                    (const_string "neon_store2_one_lane<q>")))]
5669)
5670
5671(define_expand "vec_store_lanesoi<mode>"
5672  [(set (match_operand:OI 0 "neon_struct_operand")
5673	(unspec:OI [(match_operand:OI 1 "s_register_operand")
5674                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5675                   UNSPEC_VST2))]
5676  "TARGET_NEON")
5677
5678(define_insn "neon_vst2<mode>"
5679  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5680	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5681		    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5682		   UNSPEC_VST2))]
5683  "TARGET_NEON"
5684  "vst2.<V_sz_elem>\t%h1, %A0"
5685  [(set_attr "type" "neon_store2_4reg<q>")]
5686)
5687
5688;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5689;; here on big endian targets.
5690(define_insn "neon_vst2_lane<mode>"
5691  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5692	(unspec:<V_two_elem>
5693	  [(match_operand:TI 1 "s_register_operand" "w")
5694	   (match_operand:SI 2 "immediate_operand" "i")
5695	   (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5696	  UNSPEC_VST2_LANE))]
5697  "TARGET_NEON"
5698{
5699  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5700  int regno = REGNO (operands[1]);
5701  rtx ops[4];
5702  ops[0] = operands[0];
5703  ops[1] = gen_rtx_REG (DImode, regno);
5704  ops[2] = gen_rtx_REG (DImode, regno + 2);
5705  ops[3] = GEN_INT (lane);
5706  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5707  return "";
5708}
5709  [(set_attr "type" "neon_store2_one_lane<q>")]
5710)
5711
5712;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5713;; here on big endian targets.
5714(define_insn "neon_vst2_lane<mode>"
5715  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5716        (unspec:<V_two_elem>
5717           [(match_operand:OI 1 "s_register_operand" "w")
5718            (match_operand:SI 2 "immediate_operand" "i")
5719            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5720           UNSPEC_VST2_LANE))]
5721  "TARGET_NEON"
5722{
5723  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5724  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5725  int regno = REGNO (operands[1]);
5726  rtx ops[4];
5727  if (lane >= max / 2)
5728    {
5729      lane -= max / 2;
5730      regno += 2;
5731    }
5732  ops[0] = operands[0];
5733  ops[1] = gen_rtx_REG (DImode, regno);
5734  ops[2] = gen_rtx_REG (DImode, regno + 4);
5735  ops[3] = GEN_INT (lane);
5736  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5737  return "";
5738}
5739  [(set_attr "type" "neon_store2_one_lane<q>")]
5740)
5741
5742(define_expand "vec_load_lanesei<mode>"
5743  [(set (match_operand:EI 0 "s_register_operand")
5744        (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5745                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5746		   UNSPEC_VLD3))]
5747  "TARGET_NEON")
5748
5749(define_insn "neon_vld3<mode>"
5750  [(set (match_operand:EI 0 "s_register_operand" "=w")
5751        (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5752                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5753                   UNSPEC_VLD3))]
5754  "TARGET_NEON"
5755{
5756  if (<V_sz_elem> == 64)
5757    return "vld1.64\t%h0, %A1";
5758  else
5759    return "vld3.<V_sz_elem>\t%h0, %A1";
5760}
5761  [(set (attr "type")
5762      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5763                    (const_string "neon_load1_3reg<q>")
5764                    (const_string "neon_load3_3reg<q>")))]
5765)
5766
5767(define_expand "vec_load_lanesci<mode>"
5768  [(match_operand:CI 0 "s_register_operand")
5769   (match_operand:CI 1 "neon_struct_operand")
5770   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5771  "TARGET_NEON"
5772{
5773  emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5774  DONE;
5775})
5776
5777(define_expand "neon_vld3<mode>"
5778  [(match_operand:CI 0 "s_register_operand")
5779   (match_operand:CI 1 "neon_struct_operand")
5780   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5781  "TARGET_NEON"
5782{
5783  rtx mem;
5784
5785  mem = adjust_address (operands[1], EImode, 0);
5786  emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5787  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5788  emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5789  DONE;
5790})
5791
5792(define_insn "neon_vld3qa<mode>"
5793  [(set (match_operand:CI 0 "s_register_operand" "=w")
5794        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5795                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5796                   UNSPEC_VLD3A))]
5797  "TARGET_NEON"
5798{
5799  int regno = REGNO (operands[0]);
5800  rtx ops[4];
5801  ops[0] = gen_rtx_REG (DImode, regno);
5802  ops[1] = gen_rtx_REG (DImode, regno + 4);
5803  ops[2] = gen_rtx_REG (DImode, regno + 8);
5804  ops[3] = operands[1];
5805  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5806  return "";
5807}
5808  [(set_attr "type" "neon_load3_3reg<q>")]
5809)
5810
5811(define_insn "neon_vld3qb<mode>"
5812  [(set (match_operand:CI 0 "s_register_operand" "=w")
5813        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5814                    (match_operand:CI 2 "s_register_operand" "0")
5815                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5816                   UNSPEC_VLD3B))]
5817  "TARGET_NEON"
5818{
5819  int regno = REGNO (operands[0]);
5820  rtx ops[4];
5821  ops[0] = gen_rtx_REG (DImode, regno + 2);
5822  ops[1] = gen_rtx_REG (DImode, regno + 6);
5823  ops[2] = gen_rtx_REG (DImode, regno + 10);
5824  ops[3] = operands[1];
5825  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5826  return "";
5827}
5828  [(set_attr "type" "neon_load3_3reg<q>")]
5829)
5830
5831;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5832;; here on big endian targets.
5833(define_insn "neon_vld3_lane<mode>"
5834  [(set (match_operand:EI 0 "s_register_operand" "=w")
5835        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5836                    (match_operand:EI 2 "s_register_operand" "0")
5837                    (match_operand:SI 3 "immediate_operand" "i")
5838                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5839                   UNSPEC_VLD3_LANE))]
5840  "TARGET_NEON"
5841{
5842  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5843  int regno = REGNO (operands[0]);
5844  rtx ops[5];
5845  ops[0] = gen_rtx_REG (DImode, regno);
5846  ops[1] = gen_rtx_REG (DImode, regno + 2);
5847  ops[2] = gen_rtx_REG (DImode, regno + 4);
5848  ops[3] = operands[1];
5849  ops[4] = GEN_INT (lane);
5850  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5851                   ops);
5852  return "";
5853}
5854  [(set_attr "type" "neon_load3_one_lane<q>")]
5855)
5856
5857;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5858;; here on big endian targets.
5859(define_insn "neon_vld3_lane<mode>"
5860  [(set (match_operand:CI 0 "s_register_operand" "=w")
5861        (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5862                    (match_operand:CI 2 "s_register_operand" "0")
5863                    (match_operand:SI 3 "immediate_operand" "i")
5864                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5865                   UNSPEC_VLD3_LANE))]
5866  "TARGET_NEON"
5867{
5868  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5869  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5870  int regno = REGNO (operands[0]);
5871  rtx ops[5];
5872  if (lane >= max / 2)
5873    {
5874      lane -= max / 2;
5875      regno += 2;
5876    }
5877  ops[0] = gen_rtx_REG (DImode, regno);
5878  ops[1] = gen_rtx_REG (DImode, regno + 4);
5879  ops[2] = gen_rtx_REG (DImode, regno + 8);
5880  ops[3] = operands[1];
5881  ops[4] = GEN_INT (lane);
5882  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5883                   ops);
5884  return "";
5885}
5886  [(set_attr "type" "neon_load3_one_lane<q>")]
5887)
5888
5889(define_insn "neon_vld3_dup<mode>"
5890  [(set (match_operand:EI 0 "s_register_operand" "=w")
5891        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5892                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5893                   UNSPEC_VLD3_DUP))]
5894  "TARGET_NEON"
5895{
5896  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5897    {
5898      int regno = REGNO (operands[0]);
5899      rtx ops[4];
5900      ops[0] = gen_rtx_REG (DImode, regno);
5901      ops[1] = gen_rtx_REG (DImode, regno + 2);
5902      ops[2] = gen_rtx_REG (DImode, regno + 4);
5903      ops[3] = operands[1];
5904      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5905      return "";
5906    }
5907  else
5908    return "vld1.<V_sz_elem>\t%h0, %A1";
5909}
5910  [(set (attr "type")
5911      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5912                    (const_string "neon_load3_all_lanes<q>")
5913                    (const_string "neon_load1_1reg<q>")))])
5914
5915(define_expand "vec_store_lanesei<mode>"
5916  [(set (match_operand:EI 0 "neon_struct_operand")
5917	(unspec:EI [(match_operand:EI 1 "s_register_operand")
5918                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5919                   UNSPEC_VST3))]
5920  "TARGET_NEON")
5921
5922(define_insn "neon_vst3<mode>"
5923  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5924        (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5925                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5926                   UNSPEC_VST3))]
5927  "TARGET_NEON"
5928{
5929  if (<V_sz_elem> == 64)
5930    return "vst1.64\t%h1, %A0";
5931  else
5932    return "vst3.<V_sz_elem>\t%h1, %A0";
5933}
5934  [(set (attr "type")
5935      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5936                    (const_string "neon_store1_3reg<q>")
5937                    (const_string "neon_store3_one_lane<q>")))])
5938
5939(define_expand "vec_store_lanesci<mode>"
5940  [(match_operand:CI 0 "neon_struct_operand")
5941   (match_operand:CI 1 "s_register_operand")
5942   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5943  "TARGET_NEON"
5944{
5945  emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5946  DONE;
5947})
5948
5949(define_expand "neon_vst3<mode>"
5950  [(match_operand:CI 0 "neon_struct_operand")
5951   (match_operand:CI 1 "s_register_operand")
5952   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5953  "TARGET_NEON"
5954{
5955  rtx mem;
5956
5957  mem = adjust_address (operands[0], EImode, 0);
5958  emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5959  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5960  emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5961  DONE;
5962})
5963
5964(define_insn "neon_vst3qa<mode>"
5965  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5966        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5967                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5968                   UNSPEC_VST3A))]
5969  "TARGET_NEON"
5970{
5971  int regno = REGNO (operands[1]);
5972  rtx ops[4];
5973  ops[0] = operands[0];
5974  ops[1] = gen_rtx_REG (DImode, regno);
5975  ops[2] = gen_rtx_REG (DImode, regno + 4);
5976  ops[3] = gen_rtx_REG (DImode, regno + 8);
5977  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5978  return "";
5979}
5980  [(set_attr "type" "neon_store3_3reg<q>")]
5981)
5982
5983(define_insn "neon_vst3qb<mode>"
5984  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5985        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5986                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5987                   UNSPEC_VST3B))]
5988  "TARGET_NEON"
5989{
5990  int regno = REGNO (operands[1]);
5991  rtx ops[4];
5992  ops[0] = operands[0];
5993  ops[1] = gen_rtx_REG (DImode, regno + 2);
5994  ops[2] = gen_rtx_REG (DImode, regno + 6);
5995  ops[3] = gen_rtx_REG (DImode, regno + 10);
5996  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5997  return "";
5998}
5999  [(set_attr "type" "neon_store3_3reg<q>")]
6000)
6001
6002;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6003;; here on big endian targets.
6004(define_insn "neon_vst3_lane<mode>"
6005  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6006        (unspec:<V_three_elem>
6007           [(match_operand:EI 1 "s_register_operand" "w")
6008            (match_operand:SI 2 "immediate_operand" "i")
6009            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6010           UNSPEC_VST3_LANE))]
6011  "TARGET_NEON"
6012{
6013  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6014  int regno = REGNO (operands[1]);
6015  rtx ops[5];
6016  ops[0] = operands[0];
6017  ops[1] = gen_rtx_REG (DImode, regno);
6018  ops[2] = gen_rtx_REG (DImode, regno + 2);
6019  ops[3] = gen_rtx_REG (DImode, regno + 4);
6020  ops[4] = GEN_INT (lane);
6021  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6022                   ops);
6023  return "";
6024}
6025  [(set_attr "type" "neon_store3_one_lane<q>")]
6026)
6027
6028;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6029;; here on big endian targets.
6030(define_insn "neon_vst3_lane<mode>"
6031  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6032        (unspec:<V_three_elem>
6033           [(match_operand:CI 1 "s_register_operand" "w")
6034            (match_operand:SI 2 "immediate_operand" "i")
6035            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6036           UNSPEC_VST3_LANE))]
6037  "TARGET_NEON"
6038{
6039  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6040  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6041  int regno = REGNO (operands[1]);
6042  rtx ops[5];
6043  if (lane >= max / 2)
6044    {
6045      lane -= max / 2;
6046      regno += 2;
6047    }
6048  ops[0] = operands[0];
6049  ops[1] = gen_rtx_REG (DImode, regno);
6050  ops[2] = gen_rtx_REG (DImode, regno + 4);
6051  ops[3] = gen_rtx_REG (DImode, regno + 8);
6052  ops[4] = GEN_INT (lane);
6053  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6054                   ops);
6055  return "";
6056}
6057  [(set_attr "type" "neon_store3_one_lane<q>")]
6058)
6059
6060(define_expand "vec_load_lanesoi<mode>"
6061  [(set (match_operand:OI 0 "s_register_operand")
6062        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6063                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6064		   UNSPEC_VLD4))]
6065  "TARGET_NEON")
6066
6067(define_insn "neon_vld4<mode>"
6068  [(set (match_operand:OI 0 "s_register_operand" "=w")
6069        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6070                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6071                   UNSPEC_VLD4))]
6072  "TARGET_NEON"
6073{
6074  if (<V_sz_elem> == 64)
6075    return "vld1.64\t%h0, %A1";
6076  else
6077    return "vld4.<V_sz_elem>\t%h0, %A1";
6078}
6079  [(set (attr "type")
6080      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6081                    (const_string "neon_load1_4reg<q>")
6082                    (const_string "neon_load4_4reg<q>")))]
6083)
6084
6085(define_expand "vec_load_lanesxi<mode>"
6086  [(match_operand:XI 0 "s_register_operand")
6087   (match_operand:XI 1 "neon_struct_operand")
6088   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6089  "TARGET_NEON"
6090{
6091  emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6092  DONE;
6093})
6094
6095(define_expand "neon_vld4<mode>"
6096  [(match_operand:XI 0 "s_register_operand")
6097   (match_operand:XI 1 "neon_struct_operand")
6098   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6099  "TARGET_NEON"
6100{
6101  rtx mem;
6102
6103  mem = adjust_address (operands[1], OImode, 0);
6104  emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6105  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6106  emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6107  DONE;
6108})
6109
6110(define_insn "neon_vld4qa<mode>"
6111  [(set (match_operand:XI 0 "s_register_operand" "=w")
6112        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6113                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6114                   UNSPEC_VLD4A))]
6115  "TARGET_NEON"
6116{
6117  int regno = REGNO (operands[0]);
6118  rtx ops[5];
6119  ops[0] = gen_rtx_REG (DImode, regno);
6120  ops[1] = gen_rtx_REG (DImode, regno + 4);
6121  ops[2] = gen_rtx_REG (DImode, regno + 8);
6122  ops[3] = gen_rtx_REG (DImode, regno + 12);
6123  ops[4] = operands[1];
6124  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6125  return "";
6126}
6127  [(set_attr "type" "neon_load4_4reg<q>")]
6128)
6129
6130(define_insn "neon_vld4qb<mode>"
6131  [(set (match_operand:XI 0 "s_register_operand" "=w")
6132        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6133                    (match_operand:XI 2 "s_register_operand" "0")
6134                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6135                   UNSPEC_VLD4B))]
6136  "TARGET_NEON"
6137{
6138  int regno = REGNO (operands[0]);
6139  rtx ops[5];
6140  ops[0] = gen_rtx_REG (DImode, regno + 2);
6141  ops[1] = gen_rtx_REG (DImode, regno + 6);
6142  ops[2] = gen_rtx_REG (DImode, regno + 10);
6143  ops[3] = gen_rtx_REG (DImode, regno + 14);
6144  ops[4] = operands[1];
6145  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6146  return "";
6147}
6148  [(set_attr "type" "neon_load4_4reg<q>")]
6149)
6150
6151;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6152;; here on big endian targets.
6153(define_insn "neon_vld4_lane<mode>"
6154  [(set (match_operand:OI 0 "s_register_operand" "=w")
6155        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6156                    (match_operand:OI 2 "s_register_operand" "0")
6157                    (match_operand:SI 3 "immediate_operand" "i")
6158                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6159                   UNSPEC_VLD4_LANE))]
6160  "TARGET_NEON"
6161{
6162  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6163  int regno = REGNO (operands[0]);
6164  rtx ops[6];
6165  ops[0] = gen_rtx_REG (DImode, regno);
6166  ops[1] = gen_rtx_REG (DImode, regno + 2);
6167  ops[2] = gen_rtx_REG (DImode, regno + 4);
6168  ops[3] = gen_rtx_REG (DImode, regno + 6);
6169  ops[4] = operands[1];
6170  ops[5] = GEN_INT (lane);
6171  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6172                   ops);
6173  return "";
6174}
6175  [(set_attr "type" "neon_load4_one_lane<q>")]
6176)
6177
6178;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6179;; here on big endian targets.
6180(define_insn "neon_vld4_lane<mode>"
6181  [(set (match_operand:XI 0 "s_register_operand" "=w")
6182        (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6183                    (match_operand:XI 2 "s_register_operand" "0")
6184                    (match_operand:SI 3 "immediate_operand" "i")
6185                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6186                   UNSPEC_VLD4_LANE))]
6187  "TARGET_NEON"
6188{
6189  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6190  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6191  int regno = REGNO (operands[0]);
6192  rtx ops[6];
6193  if (lane >= max / 2)
6194    {
6195      lane -= max / 2;
6196      regno += 2;
6197    }
6198  ops[0] = gen_rtx_REG (DImode, regno);
6199  ops[1] = gen_rtx_REG (DImode, regno + 4);
6200  ops[2] = gen_rtx_REG (DImode, regno + 8);
6201  ops[3] = gen_rtx_REG (DImode, regno + 12);
6202  ops[4] = operands[1];
6203  ops[5] = GEN_INT (lane);
6204  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6205                   ops);
6206  return "";
6207}
6208  [(set_attr "type" "neon_load4_one_lane<q>")]
6209)
6210
6211(define_insn "neon_vld4_dup<mode>"
6212  [(set (match_operand:OI 0 "s_register_operand" "=w")
6213        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6214                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6215                   UNSPEC_VLD4_DUP))]
6216  "TARGET_NEON"
6217{
6218  if (GET_MODE_NUNITS (<MODE>mode) > 1)
6219    {
6220      int regno = REGNO (operands[0]);
6221      rtx ops[5];
6222      ops[0] = gen_rtx_REG (DImode, regno);
6223      ops[1] = gen_rtx_REG (DImode, regno + 2);
6224      ops[2] = gen_rtx_REG (DImode, regno + 4);
6225      ops[3] = gen_rtx_REG (DImode, regno + 6);
6226      ops[4] = operands[1];
6227      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6228                       ops);
6229      return "";
6230    }
6231  else
6232    return "vld1.<V_sz_elem>\t%h0, %A1";
6233}
6234  [(set (attr "type")
6235      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6236                    (const_string "neon_load4_all_lanes<q>")
6237                    (const_string "neon_load1_1reg<q>")))]
6238)
6239
6240(define_expand "vec_store_lanesoi<mode>"
6241  [(set (match_operand:OI 0 "neon_struct_operand")
6242	(unspec:OI [(match_operand:OI 1 "s_register_operand")
6243                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6244                   UNSPEC_VST4))]
6245  "TARGET_NEON")
6246
6247(define_insn "neon_vst4<mode>"
6248  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6249        (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6250                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6251                   UNSPEC_VST4))]
6252  "TARGET_NEON"
6253{
6254  if (<V_sz_elem> == 64)
6255    return "vst1.64\t%h1, %A0";
6256  else
6257    return "vst4.<V_sz_elem>\t%h1, %A0";
6258}
6259  [(set (attr "type")
6260      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6261                    (const_string "neon_store1_4reg<q>")
6262                    (const_string "neon_store4_4reg<q>")))]
6263)
6264
6265(define_expand "vec_store_lanesxi<mode>"
6266  [(match_operand:XI 0 "neon_struct_operand")
6267   (match_operand:XI 1 "s_register_operand")
6268   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6269  "TARGET_NEON"
6270{
6271  emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6272  DONE;
6273})
6274
6275(define_expand "neon_vst4<mode>"
6276  [(match_operand:XI 0 "neon_struct_operand")
6277   (match_operand:XI 1 "s_register_operand")
6278   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6279  "TARGET_NEON"
6280{
6281  rtx mem;
6282
6283  mem = adjust_address (operands[0], OImode, 0);
6284  emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6285  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6286  emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6287  DONE;
6288})
6289
6290(define_insn "neon_vst4qa<mode>"
6291  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6292        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6293                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6294                   UNSPEC_VST4A))]
6295  "TARGET_NEON"
6296{
6297  int regno = REGNO (operands[1]);
6298  rtx ops[5];
6299  ops[0] = operands[0];
6300  ops[1] = gen_rtx_REG (DImode, regno);
6301  ops[2] = gen_rtx_REG (DImode, regno + 4);
6302  ops[3] = gen_rtx_REG (DImode, regno + 8);
6303  ops[4] = gen_rtx_REG (DImode, regno + 12);
6304  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6305  return "";
6306}
6307  [(set_attr "type" "neon_store4_4reg<q>")]
6308)
6309
6310(define_insn "neon_vst4qb<mode>"
6311  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6312        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6313                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6314                   UNSPEC_VST4B))]
6315  "TARGET_NEON"
6316{
6317  int regno = REGNO (operands[1]);
6318  rtx ops[5];
6319  ops[0] = operands[0];
6320  ops[1] = gen_rtx_REG (DImode, regno + 2);
6321  ops[2] = gen_rtx_REG (DImode, regno + 6);
6322  ops[3] = gen_rtx_REG (DImode, regno + 10);
6323  ops[4] = gen_rtx_REG (DImode, regno + 14);
6324  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6325  return "";
6326}
6327  [(set_attr "type" "neon_store4_4reg<q>")]
6328)
6329
6330;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6331;; here on big endian targets.
6332(define_insn "neon_vst4_lane<mode>"
6333  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6334        (unspec:<V_four_elem>
6335           [(match_operand:OI 1 "s_register_operand" "w")
6336            (match_operand:SI 2 "immediate_operand" "i")
6337            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6338           UNSPEC_VST4_LANE))]
6339  "TARGET_NEON"
6340{
6341  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6342  int regno = REGNO (operands[1]);
6343  rtx ops[6];
6344  ops[0] = operands[0];
6345  ops[1] = gen_rtx_REG (DImode, regno);
6346  ops[2] = gen_rtx_REG (DImode, regno + 2);
6347  ops[3] = gen_rtx_REG (DImode, regno + 4);
6348  ops[4] = gen_rtx_REG (DImode, regno + 6);
6349  ops[5] = GEN_INT (lane);
6350  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6351                   ops);
6352  return "";
6353}
6354  [(set_attr "type" "neon_store4_one_lane<q>")]
6355)
6356
6357;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6358;; here on big endian targets.
6359(define_insn "neon_vst4_lane<mode>"
6360  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6361        (unspec:<V_four_elem>
6362           [(match_operand:XI 1 "s_register_operand" "w")
6363            (match_operand:SI 2 "immediate_operand" "i")
6364            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6365           UNSPEC_VST4_LANE))]
6366  "TARGET_NEON"
6367{
6368  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6369  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6370  int regno = REGNO (operands[1]);
6371  rtx ops[6];
6372  if (lane >= max / 2)
6373    {
6374      lane -= max / 2;
6375      regno += 2;
6376    }
6377  ops[0] = operands[0];
6378  ops[1] = gen_rtx_REG (DImode, regno);
6379  ops[2] = gen_rtx_REG (DImode, regno + 4);
6380  ops[3] = gen_rtx_REG (DImode, regno + 8);
6381  ops[4] = gen_rtx_REG (DImode, regno + 12);
6382  ops[5] = GEN_INT (lane);
6383  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6384                   ops);
6385  return "";
6386}
6387  [(set_attr "type" "neon_store4_4reg<q>")]
6388)
6389
6390(define_insn "neon_vec_unpack<US>_lo_<mode>"
6391  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6392        (SE:<V_unpack> (vec_select:<V_HALF>
6393			  (match_operand:VU 1 "register_operand" "w")
6394			  (match_operand:VU 2 "vect_par_constant_low" ""))))]
6395  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6396  "vmovl.<US><V_sz_elem> %q0, %e1"
6397  [(set_attr "type" "neon_shift_imm_long")]
6398)
6399
6400(define_insn "neon_vec_unpack<US>_hi_<mode>"
6401  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6402        (SE:<V_unpack> (vec_select:<V_HALF>
6403			  (match_operand:VU 1 "register_operand" "w")
6404			  (match_operand:VU 2 "vect_par_constant_high" ""))))]
6405  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6406  "vmovl.<US><V_sz_elem> %q0, %f1"
6407  [(set_attr "type" "neon_shift_imm_long")]
6408)
6409
6410(define_expand "vec_unpack<US>_hi_<mode>"
6411  [(match_operand:<V_unpack> 0 "register_operand" "")
6412   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6413 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6414  {
6415   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6416   rtx t1;
6417   int i;
6418   for (i = 0; i < (<V_mode_nunits>/2); i++)
6419     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6420
6421   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6422   emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6423                                                 operands[1],
6424					         t1));
6425   DONE;
6426  }
6427)
6428
6429(define_expand "vec_unpack<US>_lo_<mode>"
6430  [(match_operand:<V_unpack> 0 "register_operand" "")
6431   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6432 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6433  {
6434   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6435   rtx t1;
6436   int i;
6437   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6438     RTVEC_ELT (v, i) = GEN_INT (i);
6439   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6440   emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6441                                                 operands[1],
6442				   	         t1));
6443   DONE;
6444  }
6445)
6446
6447(define_insn "neon_vec_<US>mult_lo_<mode>"
6448 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6449       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6450			   (match_operand:VU 1 "register_operand" "w")
6451                           (match_operand:VU 2 "vect_par_constant_low" "")))
6452 		        (SE:<V_unpack> (vec_select:<V_HALF>
6453                           (match_operand:VU 3 "register_operand" "w")
6454                           (match_dup 2)))))]
6455  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6456  "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6457  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6458)
6459
6460(define_expand "vec_widen_<US>mult_lo_<mode>"
6461  [(match_operand:<V_unpack> 0 "register_operand" "")
6462   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6463   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6464 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6465 {
6466   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6467   rtx t1;
6468   int i;
6469   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6470     RTVEC_ELT (v, i) = GEN_INT (i);
6471   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6472
6473   emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6474 					       operands[1],
6475					       t1,
6476					       operands[2]));
6477   DONE;
6478 }
6479)
6480
6481(define_insn "neon_vec_<US>mult_hi_<mode>"
6482 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6483      (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6484			    (match_operand:VU 1 "register_operand" "w")
6485			    (match_operand:VU 2 "vect_par_constant_high" "")))
6486		       (SE:<V_unpack> (vec_select:<V_HALF>
6487			    (match_operand:VU 3 "register_operand" "w")
6488			    (match_dup 2)))))]
6489  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6490  "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6491  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6492)
6493
6494(define_expand "vec_widen_<US>mult_hi_<mode>"
6495  [(match_operand:<V_unpack> 0 "register_operand" "")
6496   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6497   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6498 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6499 {
6500   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6501   rtx t1;
6502   int i;
6503   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6504     RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6505   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6506
6507   emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6508 					       operands[1],
6509					       t1,
6510					       operands[2]));
6511   DONE;
6512
6513 }
6514)
6515
6516(define_insn "neon_vec_<US>shiftl_<mode>"
6517 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6518       (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6519       (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6520  "TARGET_NEON"
6521{
6522  return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6523}
6524  [(set_attr "type" "neon_shift_imm_long")]
6525)
6526
6527(define_expand "vec_widen_<US>shiftl_lo_<mode>"
6528  [(match_operand:<V_unpack> 0 "register_operand" "")
6529   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6530   (match_operand:SI 2 "immediate_operand" "i")]
6531 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6532 {
6533  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6534		simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6535		operands[2]));
6536   DONE;
6537 }
6538)
6539
6540(define_expand "vec_widen_<US>shiftl_hi_<mode>"
6541  [(match_operand:<V_unpack> 0 "register_operand" "")
6542   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6543   (match_operand:SI 2 "immediate_operand" "i")]
6544 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6545 {
6546  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6547                simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6548				     GET_MODE_SIZE (<V_HALF>mode)),
6549                operands[2]));
6550   DONE;
6551 }
6552)
6553
6554;; Vectorize for non-neon-quad case
6555(define_insn "neon_unpack<US>_<mode>"
6556 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6557       (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6558 "TARGET_NEON"
6559 "vmovl.<US><V_sz_elem> %q0, %P1"
6560  [(set_attr "type" "neon_move")]
6561)
6562
6563(define_expand "vec_unpack<US>_lo_<mode>"
6564 [(match_operand:<V_double_width> 0 "register_operand" "")
6565  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6566 "TARGET_NEON"
6567{
6568  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6569  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6570  emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6571
6572  DONE;
6573}
6574)
6575
6576(define_expand "vec_unpack<US>_hi_<mode>"
6577 [(match_operand:<V_double_width> 0 "register_operand" "")
6578  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6579 "TARGET_NEON"
6580{
6581  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6582  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6583  emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6584
6585  DONE;
6586}
6587)
6588
6589(define_insn "neon_vec_<US>mult_<mode>"
6590 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6591       (mult:<V_widen> (SE:<V_widen>
6592		 	   (match_operand:VDI 1 "register_operand" "w"))
6593 		       (SE:<V_widen>
6594			   (match_operand:VDI 2 "register_operand" "w"))))]
6595  "TARGET_NEON"
6596  "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6597  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6598)
6599
6600(define_expand "vec_widen_<US>mult_hi_<mode>"
6601  [(match_operand:<V_double_width> 0 "register_operand" "")
6602   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6603   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6604 "TARGET_NEON"
6605 {
6606   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6607   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6608   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6609
6610   DONE;
6611
6612 }
6613)
6614
6615(define_expand "vec_widen_<US>mult_lo_<mode>"
6616  [(match_operand:<V_double_width> 0 "register_operand" "")
6617   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6618   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6619 "TARGET_NEON"
6620 {
6621   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6622   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6623   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6624
6625   DONE;
6626
6627 }
6628)
6629
6630(define_expand "vec_widen_<US>shiftl_hi_<mode>"
6631 [(match_operand:<V_double_width> 0 "register_operand" "")
6632   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6633   (match_operand:SI 2 "immediate_operand" "i")]
6634 "TARGET_NEON"
6635 {
6636   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6637   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6638   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6639
6640   DONE;
6641 }
6642)
6643
6644(define_expand "vec_widen_<US>shiftl_lo_<mode>"
6645  [(match_operand:<V_double_width> 0 "register_operand" "")
6646   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6647   (match_operand:SI 2 "immediate_operand" "i")]
6648 "TARGET_NEON"
6649 {
6650   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6651   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6652   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6653
6654   DONE;
6655 }
6656)
6657
6658; FIXME: These instruction patterns can't be used safely in big-endian mode
6659; because the ordering of vector elements in Q registers is different from what
6660; the semantics of the instructions require.
6661
6662(define_insn "vec_pack_trunc_<mode>"
6663 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6664       (vec_concat:<V_narrow_pack>
6665		(truncate:<V_narrow>
6666			(match_operand:VN 1 "register_operand" "w"))
6667		(truncate:<V_narrow>
6668			(match_operand:VN 2 "register_operand" "w"))))]
6669 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6670 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6671 [(set_attr "type" "multiple")
6672  (set_attr "length" "8")]
6673)
6674
6675;; For the non-quad case.
6676(define_insn "neon_vec_pack_trunc_<mode>"
6677 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6678       (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6679 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6680 "vmovn.i<V_sz_elem>\t%P0, %q1"
6681 [(set_attr "type" "neon_move_narrow_q")]
6682)
6683
6684(define_expand "vec_pack_trunc_<mode>"
6685 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6686  (match_operand:VSHFT 1 "register_operand" "")
6687  (match_operand:VSHFT 2 "register_operand")]
6688 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6689{
6690  rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6691
6692  emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6693  emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6694  emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6695  DONE;
6696})
6697
6698(define_insn "neon_vabd<mode>_2"
6699 [(set (match_operand:VF 0 "s_register_operand" "=w")
6700       (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6701			 (match_operand:VF 2 "s_register_operand" "w"))))]
6702 "TARGET_NEON && flag_unsafe_math_optimizations"
6703 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6704 [(set_attr "type" "neon_fp_abd_s<q>")]
6705)
6706
6707(define_insn "neon_vabd<mode>_3"
6708 [(set (match_operand:VF 0 "s_register_operand" "=w")
6709       (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6710			    (match_operand:VF 2 "s_register_operand" "w")]
6711		UNSPEC_VSUB)))]
6712 "TARGET_NEON && flag_unsafe_math_optimizations"
6713 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6714 [(set_attr "type" "neon_fp_abd_s<q>")]
6715)
6716
6717;; Copy from core-to-neon regs, then extend, not vice-versa
6718
6719(define_split
6720  [(set (match_operand:DI 0 "s_register_operand" "")
6721	(sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6722  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6723  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6724   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6725  {
6726    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6727  })
6728
6729(define_split
6730  [(set (match_operand:DI 0 "s_register_operand" "")
6731	(sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6732  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6733  [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6734   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6735  {
6736    operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6737  })
6738
6739(define_split
6740  [(set (match_operand:DI 0 "s_register_operand" "")
6741	(sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6742  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6743  [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6744   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6745  {
6746    operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6747  })
6748
6749(define_split
6750  [(set (match_operand:DI 0 "s_register_operand" "")
6751	(zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6752  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6753  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6754   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6755  {
6756    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6757  })
6758
6759(define_split
6760  [(set (match_operand:DI 0 "s_register_operand" "")
6761	(zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6762  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6763  [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6764   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6765  {
6766    operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6767  })
6768
6769(define_split
6770  [(set (match_operand:DI 0 "s_register_operand" "")
6771	(zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6772  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6773  [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6774   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6775  {
6776    operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6777  })
6778