1;; ARM NEON coprocessor Machine Description
2;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3;; Written by CodeSourcery.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21
22;; Attribute used to permit string comparisons against <VQH_mnem> in
23;; type attribute definitions.
24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26(define_insn "*neon_mov<mode>"
27  [(set (match_operand:VDX 0 "nonimmediate_operand"
28	  "=w,Un,w, w,  ?r,?w,?r,?r, ?Us")
29	(match_operand:VDX 1 "general_operand"
30	  " w,w, Dn,Uni, w, r, r, Usi,r"))]
31  "TARGET_NEON
32   && (register_operand (operands[0], <MODE>mode)
33       || register_operand (operands[1], <MODE>mode))"
34{
35  if (which_alternative == 2)
36    {
37      int width, is_valid;
38      static char templ[40];
39
40      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41        &operands[1], &width);
42
43      gcc_assert (is_valid != 0);
44
45      if (width == 0)
46        return "vmov.f32\t%P0, %1  @ <mode>";
47      else
48        sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
49
50      return templ;
51    }
52
53  switch (which_alternative)
54    {
55    case 0: return "vmov\t%P0, %P1  @ <mode>";
56    case 1: case 3: return output_move_neon (operands);
57    case 2: gcc_unreachable ();
58    case 4: return "vmov\t%Q0, %R0, %P1  @ <mode>";
59    case 5: return "vmov\t%P0, %Q1, %R1  @ <mode>";
60    default: return output_move_double (operands, true, NULL);
61    }
62}
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64                    neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65                    neon_load1_2reg, neon_store1_2reg")
66  (set_attr "length" "4,4,4,4,4,4,8,8,8")
67  (set_attr "arm_pool_range"     "*,*,*,1020,*,*,*,1020,*")
68  (set_attr "thumb2_pool_range"     "*,*,*,1018,*,*,*,1018,*")
69  (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
70
71(define_insn "*neon_mov<mode>"
72  [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73  	  "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
74	(match_operand:VQXMOV 1 "general_operand"
75	  " w,w, Dn,Uni, w, r, r, Usi, r"))]
76  "TARGET_NEON
77   && (register_operand (operands[0], <MODE>mode)
78       || register_operand (operands[1], <MODE>mode))"
79{
80  if (which_alternative == 2)
81    {
82      int width, is_valid;
83      static char templ[40];
84
85      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86        &operands[1], &width);
87
88      gcc_assert (is_valid != 0);
89
90      if (width == 0)
91        return "vmov.f32\t%q0, %1  @ <mode>";
92      else
93        sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
94
95      return templ;
96    }
97
98  switch (which_alternative)
99    {
100    case 0: return "vmov\t%q0, %q1  @ <mode>";
101    case 1: case 3: return output_move_neon (operands);
102    case 2: gcc_unreachable ();
103    case 4: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
104    case 5: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
105    default: return output_move_quad (operands);
106    }
107}
108  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109                     neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110                     mov_reg,neon_load1_4reg,neon_store1_4reg")
111   (set_attr "length" "4,8,4,8,8,8,16,8,16")
112   (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113   (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114   (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
115
116(define_expand "movti"
117  [(set (match_operand:TI 0 "nonimmediate_operand" "")
118	(match_operand:TI 1 "general_operand" ""))]
119  "TARGET_NEON"
120{
121  if (can_create_pseudo_p ())
122    {
123      if (!REG_P (operands[0]))
124	operands[1] = force_reg (TImode, operands[1]);
125    }
126})
127
128(define_expand "mov<mode>"
129  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130	(match_operand:VSTRUCT 1 "general_operand" ""))]
131  "TARGET_NEON"
132{
133  if (can_create_pseudo_p ())
134    {
135      if (!REG_P (operands[0]))
136	operands[1] = force_reg (<MODE>mode, operands[1]);
137    }
138})
139
140(define_expand "movv4hf"
141  [(set (match_operand:V4HF 0 "s_register_operand")
142	(match_operand:V4HF 1 "s_register_operand"))]
143  "TARGET_NEON && TARGET_FP16"
144{
145  /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146     causing an ICE on big-endian because it cannot extract subregs in
147     this case.  */
148  if (can_create_pseudo_p ())
149    {
150      if (!REG_P (operands[0]))
151	operands[1] = force_reg (V4HFmode, operands[1]);
152    }
153})
154
155(define_expand "movv8hf"
156  [(set (match_operand:V8HF 0 "")
157	(match_operand:V8HF 1 ""))]
158  "TARGET_NEON && TARGET_FP16"
159{
160  /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161     causing an ICE on big-endian because it cannot extract subregs in
162     this case.  */
163  if (can_create_pseudo_p ())
164    {
165      if (!REG_P (operands[0]))
166	operands[1] = force_reg (V8HFmode, operands[1]);
167    }
168})
169
170(define_insn "*neon_mov<mode>"
171  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand"	"=w,Ut,w")
172	(match_operand:VSTRUCT 1 "general_operand"	" w,w, Ut"))]
173  "TARGET_NEON
174   && (register_operand (operands[0], <MODE>mode)
175       || register_operand (operands[1], <MODE>mode))"
176{
177  switch (which_alternative)
178    {
179    case 0: return "#";
180    case 1: case 2: return output_move_neon (operands);
181    default: gcc_unreachable ();
182    }
183}
184  [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185   (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
186
187(define_split
188  [(set (match_operand:EI 0 "s_register_operand" "")
189	(match_operand:EI 1 "s_register_operand" ""))]
190  "TARGET_NEON && reload_completed"
191  [(set (match_dup 0) (match_dup 1))
192   (set (match_dup 2) (match_dup 3))]
193{
194  int rdest = REGNO (operands[0]);
195  int rsrc = REGNO (operands[1]);
196  rtx dest[2], src[2];
197
198  dest[0] = gen_rtx_REG (TImode, rdest);
199  src[0] = gen_rtx_REG (TImode, rsrc);
200  dest[1] = gen_rtx_REG (DImode, rdest + 4);
201  src[1] = gen_rtx_REG (DImode, rsrc + 4);
202
203  neon_disambiguate_copy (operands, dest, src, 2);
204})
205
206(define_split
207  [(set (match_operand:OI 0 "s_register_operand" "")
208	(match_operand:OI 1 "s_register_operand" ""))]
209  "TARGET_NEON && reload_completed"
210  [(set (match_dup 0) (match_dup 1))
211   (set (match_dup 2) (match_dup 3))]
212{
213  int rdest = REGNO (operands[0]);
214  int rsrc = REGNO (operands[1]);
215  rtx dest[2], src[2];
216
217  dest[0] = gen_rtx_REG (TImode, rdest);
218  src[0] = gen_rtx_REG (TImode, rsrc);
219  dest[1] = gen_rtx_REG (TImode, rdest + 4);
220  src[1] = gen_rtx_REG (TImode, rsrc + 4);
221
222  neon_disambiguate_copy (operands, dest, src, 2);
223})
224
225(define_split
226  [(set (match_operand:CI 0 "s_register_operand" "")
227	(match_operand:CI 1 "s_register_operand" ""))]
228  "TARGET_NEON && reload_completed"
229  [(set (match_dup 0) (match_dup 1))
230   (set (match_dup 2) (match_dup 3))
231   (set (match_dup 4) (match_dup 5))]
232{
233  int rdest = REGNO (operands[0]);
234  int rsrc = REGNO (operands[1]);
235  rtx dest[3], src[3];
236
237  dest[0] = gen_rtx_REG (TImode, rdest);
238  src[0] = gen_rtx_REG (TImode, rsrc);
239  dest[1] = gen_rtx_REG (TImode, rdest + 4);
240  src[1] = gen_rtx_REG (TImode, rsrc + 4);
241  dest[2] = gen_rtx_REG (TImode, rdest + 8);
242  src[2] = gen_rtx_REG (TImode, rsrc + 8);
243
244  neon_disambiguate_copy (operands, dest, src, 3);
245})
246
247(define_split
248  [(set (match_operand:XI 0 "s_register_operand" "")
249	(match_operand:XI 1 "s_register_operand" ""))]
250  "TARGET_NEON && reload_completed"
251  [(set (match_dup 0) (match_dup 1))
252   (set (match_dup 2) (match_dup 3))
253   (set (match_dup 4) (match_dup 5))
254   (set (match_dup 6) (match_dup 7))]
255{
256  int rdest = REGNO (operands[0]);
257  int rsrc = REGNO (operands[1]);
258  rtx dest[4], src[4];
259
260  dest[0] = gen_rtx_REG (TImode, rdest);
261  src[0] = gen_rtx_REG (TImode, rsrc);
262  dest[1] = gen_rtx_REG (TImode, rdest + 4);
263  src[1] = gen_rtx_REG (TImode, rsrc + 4);
264  dest[2] = gen_rtx_REG (TImode, rdest + 8);
265  src[2] = gen_rtx_REG (TImode, rsrc + 8);
266  dest[3] = gen_rtx_REG (TImode, rdest + 12);
267  src[3] = gen_rtx_REG (TImode, rsrc + 12);
268
269  neon_disambiguate_copy (operands, dest, src, 4);
270})
271
272(define_expand "movmisalign<mode>"
273  [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274	(unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275		     UNSPEC_MISALIGNED_ACCESS))]
276  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
277{
278  rtx adjust_mem;
279  /* This pattern is not permitted to fail during expansion: if both arguments
280     are non-registers (e.g. memory := constant, which can be created by the
281     auto-vectorizer), force operand 1 into a register.  */
282  if (!s_register_operand (operands[0], <MODE>mode)
283      && !s_register_operand (operands[1], <MODE>mode))
284    operands[1] = force_reg (<MODE>mode, operands[1]);
285
286  if (s_register_operand (operands[0], <MODE>mode))
287    adjust_mem = operands[1];
288  else
289    adjust_mem = operands[0];
290
291  /* Legitimize address.  */
292  if (!neon_vector_mem_operand (adjust_mem, 2, true))
293    XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
294
295})
296
297(define_insn "*movmisalign<mode>_neon_store"
298  [(set (match_operand:VDX 0 "neon_permissive_struct_operand"	"=Um")
299	(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300		    UNSPEC_MISALIGNED_ACCESS))]
301  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302  "vst1.<V_sz_elem>\t{%P1}, %A0"
303  [(set_attr "type" "neon_store1_1reg<q>")])
304
305(define_insn "*movmisalign<mode>_neon_load"
306  [(set (match_operand:VDX 0 "s_register_operand"			"=w")
307	(unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
308									" Um")]
309		    UNSPEC_MISALIGNED_ACCESS))]
310  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311  "vld1.<V_sz_elem>\t{%P0}, %A1"
312  [(set_attr "type" "neon_load1_1reg<q>")])
313
314(define_insn "*movmisalign<mode>_neon_store"
315  [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
316	(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317		    UNSPEC_MISALIGNED_ACCESS))]
318  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319  "vst1.<V_sz_elem>\t{%q1}, %A0"
320  [(set_attr "type" "neon_store1_1reg<q>")])
321
322(define_insn "*movmisalign<mode>_neon_load"
323  [(set (match_operand:VQX 0 "s_register_operand"			"=w")
324	(unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
325									" Um")]
326		    UNSPEC_MISALIGNED_ACCESS))]
327  "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328  "vld1.<V_sz_elem>\t{%q0}, %A1"
329  [(set_attr "type" "neon_load1_1reg<q>")])
330
331(define_insn "vec_set<mode>_internal"
332  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
333        (vec_merge:VD_LANE
334          (vec_duplicate:VD_LANE
335            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336          (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337          (match_operand:SI 2 "immediate_operand" "i,i")))]
338  "TARGET_NEON"
339{
340  int elt = ffs ((int) INTVAL (operands[2])) - 1;
341  if (BYTES_BIG_ENDIAN)
342    elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343  operands[2] = GEN_INT (elt);
344
345  if (which_alternative == 0)
346    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
347  else
348    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
349}
350  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
351
352(define_insn "vec_set<mode>_internal"
353  [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
354        (vec_merge:VQ2
355          (vec_duplicate:VQ2
356            (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357          (match_operand:VQ2 3 "s_register_operand" "0,0")
358          (match_operand:SI 2 "immediate_operand" "i,i")))]
359  "TARGET_NEON"
360{
361  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363  int elt = elem % half_elts;
364  int hi = (elem / half_elts) * 2;
365  int regno = REGNO (operands[0]);
366
367  if (BYTES_BIG_ENDIAN)
368    elt = half_elts - 1 - elt;
369
370  operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371  operands[2] = GEN_INT (elt);
372
373  if (which_alternative == 0)
374    return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
375  else
376    return "vmov.<V_sz_elem>\t%P0[%c2], %1";
377}
378  [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
379)
380
381(define_insn "vec_setv2di_internal"
382  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
383        (vec_merge:V2DI
384          (vec_duplicate:V2DI
385            (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386          (match_operand:V2DI 3 "s_register_operand" "0,0")
387          (match_operand:SI 2 "immediate_operand" "i,i")))]
388  "TARGET_NEON"
389{
390  HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391  int regno = REGNO (operands[0]) + 2 * elem;
392
393  operands[0] = gen_rtx_REG (DImode, regno);
394
395  if (which_alternative == 0)
396    return "vld1.64\t%P0, %A1";
397  else
398    return "vmov\t%P0, %Q1, %R1";
399}
400  [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
401)
402
403(define_expand "vec_set<mode>"
404  [(match_operand:VDQ 0 "s_register_operand" "")
405   (match_operand:<V_elem> 1 "s_register_operand" "")
406   (match_operand:SI 2 "immediate_operand" "")]
407  "TARGET_NEON"
408{
409  HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
410  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411					 GEN_INT (elem), operands[0]));
412  DONE;
413})
414
415(define_insn "vec_extract<mode>"
416  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417        (vec_select:<V_elem>
418          (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
420  "TARGET_NEON"
421{
422  if (BYTES_BIG_ENDIAN)
423    {
424      int elt = INTVAL (operands[2]);
425      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426      operands[2] = GEN_INT (elt);
427    }
428
429  if (which_alternative == 0)
430    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431  else
432    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433}
434  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
435)
436
437(define_insn "vec_extract<mode>"
438  [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
439	(vec_select:<V_elem>
440          (match_operand:VQ2 1 "s_register_operand" "w,w")
441          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
442  "TARGET_NEON"
443{
444  int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445  int elt = INTVAL (operands[2]) % half_elts;
446  int hi = (INTVAL (operands[2]) / half_elts) * 2;
447  int regno = REGNO (operands[1]);
448
449  if (BYTES_BIG_ENDIAN)
450    elt = half_elts - 1 - elt;
451
452  operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453  operands[2] = GEN_INT (elt);
454
455  if (which_alternative == 0)
456    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
457  else
458    return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
459}
460  [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
461)
462
463(define_insn "vec_extractv2di"
464  [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
465	(vec_select:DI
466          (match_operand:V2DI 1 "s_register_operand" "w,w")
467          (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
468  "TARGET_NEON"
469{
470  int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
471
472  operands[1] = gen_rtx_REG (DImode, regno);
473
474  if (which_alternative == 0)
475    return "vst1.64\t{%P1}, %A0  @ v2di";
476  else
477    return "vmov\t%Q0, %R0, %P1  @ v2di";
478}
479  [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
480)
481
482(define_expand "vec_init<mode>"
483  [(match_operand:VDQ 0 "s_register_operand" "")
484   (match_operand 1 "" "")]
485  "TARGET_NEON"
486{
487  neon_expand_vector_init (operands[0], operands[1]);
488  DONE;
489})
490
491;; Doubleword and quadword arithmetic.
492
493;; NOTE: some other instructions also support 64-bit integer
494;; element size, which we could potentially use for "long long" operations.
495
496(define_insn "*add<mode>3_neon"
497  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498        (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499		  (match_operand:VDQ 2 "s_register_operand" "w")))]
500  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
502  [(set (attr "type")
503      (if_then_else (match_test "<Is_float_mode>")
504                    (const_string "neon_fp_addsub_s<q>")
505                    (const_string "neon_add<q>")))]
506)
507
508(define_insn "adddi3_neon"
509  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
510        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
511                 (match_operand:DI 2 "arm_adddi_operand"     "w,r,0,w,r,Dd,Dd")))
512   (clobber (reg:CC CC_REGNUM))]
513  "TARGET_NEON"
514{
515  switch (which_alternative)
516    {
517    case 0: /* fall through */
518    case 3: return "vadd.i64\t%P0, %P1, %P2";
519    case 1: return "#";
520    case 2: return "#";
521    case 4: return "#";
522    case 5: return "#";
523    case 6: return "#";
524    default: gcc_unreachable ();
525    }
526}
527  [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
528		     multiple,multiple,multiple")
529   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
530   (set_attr "length" "*,8,8,*,8,8,8")
531   (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
532)
533
534(define_insn "*sub<mode>3_neon"
535  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
536        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
537                   (match_operand:VDQ 2 "s_register_operand" "w")))]
538  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
539  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
540  [(set (attr "type")
541      (if_then_else (match_test "<Is_float_mode>")
542                    (const_string "neon_fp_addsub_s<q>")
543                    (const_string "neon_sub<q>")))]
544)
545
546(define_insn "subdi3_neon"
547  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
548        (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
549                  (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
550   (clobber (reg:CC CC_REGNUM))]
551  "TARGET_NEON"
552{
553  switch (which_alternative)
554    {
555    case 0: /* fall through */
556    case 4: return "vsub.i64\t%P0, %P1, %P2";
557    case 1: /* fall through */
558    case 2: /* fall through */
559    case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
560    default: gcc_unreachable ();
561    }
562}
563  [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
564   (set_attr "conds" "*,clob,clob,clob,*")
565   (set_attr "length" "*,8,8,8,*")
566   (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
567)
568
569(define_insn "*mul<mode>3_neon"
570  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
571        (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
572                   (match_operand:VDQW 2 "s_register_operand" "w")))]
573  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
574  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
575  [(set (attr "type")
576      (if_then_else (match_test "<Is_float_mode>")
577		    (const_string "neon_fp_mul_s<q>")
578                    (const_string "neon_mul_<V_elem_ch><q>")))]
579)
580
581(define_insn "mul<mode>3add<mode>_neon"
582  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
583        (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
584                            (match_operand:VDQW 3 "s_register_operand" "w"))
585		  (match_operand:VDQW 1 "s_register_operand" "0")))]
586  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
587  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
588  [(set (attr "type")
589      (if_then_else (match_test "<Is_float_mode>")
590		    (const_string "neon_fp_mla_s<q>")
591		    (const_string "neon_mla_<V_elem_ch><q>")))]
592)
593
594(define_insn "mul<mode>3neg<mode>add<mode>_neon"
595  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596        (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
597                    (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
598                               (match_operand:VDQW 3 "s_register_operand" "w"))))]
599  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
600  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
601  [(set (attr "type")
602      (if_then_else (match_test "<Is_float_mode>")
603		    (const_string "neon_fp_mla_s<q>")
604		    (const_string "neon_mla_<V_elem_ch><q>")))]
605)
606
607;; Fused multiply-accumulate
608;; We define each insn twice here:
609;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
610;;       to be able to use when converting to FMA.
611;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
612(define_insn "fma<VCVTF:mode>4"
613  [(set (match_operand:VCVTF 0 "register_operand" "=w")
614        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
615		 (match_operand:VCVTF 2 "register_operand" "w")
616		 (match_operand:VCVTF 3 "register_operand" "0")))]
617  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
618  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619  [(set_attr "type" "neon_fp_mla_s<q>")]
620)
621
622(define_insn "fma<VCVTF:mode>4_intrinsic"
623  [(set (match_operand:VCVTF 0 "register_operand" "=w")
624        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
625		 (match_operand:VCVTF 2 "register_operand" "w")
626		 (match_operand:VCVTF 3 "register_operand" "0")))]
627  "TARGET_NEON && TARGET_FMA"
628  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629  [(set_attr "type" "neon_fp_mla_s<q>")]
630)
631
632(define_insn "*fmsub<VCVTF:mode>4"
633  [(set (match_operand:VCVTF 0 "register_operand" "=w")
634        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
635		   (match_operand:VCVTF 2 "register_operand" "w")
636		   (match_operand:VCVTF 3 "register_operand" "0")))]
637  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
638  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
639  [(set_attr "type" "neon_fp_mla_s<q>")]
640)
641
642(define_insn "fmsub<VCVTF:mode>4_intrinsic"
643  [(set (match_operand:VCVTF 0 "register_operand" "=w")
644        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
645		   (match_operand:VCVTF 2 "register_operand" "w")
646		   (match_operand:VCVTF 3 "register_operand" "0")))]
647  "TARGET_NEON && TARGET_FMA"
648  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
649  [(set_attr "type" "neon_fp_mla_s<q>")]
650)
651
652(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
653  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
654        (unspec:VCVTF [(match_operand:VCVTF 1
655		         "s_register_operand" "w")]
656		NEON_VRINT))]
657  "TARGET_NEON && TARGET_FPU_ARMV8"
658  "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
659  [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
660)
661
662(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
663  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
664	(FIXUORS:<V_cmp_result> (unspec:VCVTF
665			       [(match_operand:VCVTF 1 "register_operand" "w")]
666			       NEON_VCVT)))]
667  "TARGET_NEON && TARGET_FPU_ARMV8"
668  "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
669  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
670   (set_attr "predicable" "no")]
671)
672
673(define_insn "ior<mode>3"
674  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
675	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
676		 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
677  "TARGET_NEON"
678{
679  switch (which_alternative)
680    {
681    case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
682    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
683		     <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
684    default: gcc_unreachable ();
685    }
686}
687  [(set_attr "type" "neon_logic<q>")]
688)
689
690;; The concrete forms of the Neon immediate-logic instructions are vbic and
691;; vorr. We support the pseudo-instruction vand instead, because that
692;; corresponds to the canonical form the middle-end expects to use for
693;; immediate bitwise-ANDs.
694
695(define_insn "and<mode>3"
696  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
697	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
698		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
699  "TARGET_NEON"
700{
701  switch (which_alternative)
702    {
703    case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
704    case 1: return neon_output_logic_immediate ("vand", &operands[2],
705    		     <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
706    default: gcc_unreachable ();
707    }
708}
709  [(set_attr "type" "neon_logic<q>")]
710)
711
712(define_insn "orn<mode>3_neon"
713  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
714	(ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
715		 (match_operand:VDQ 1 "s_register_operand" "w")))]
716  "TARGET_NEON"
717  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
718  [(set_attr "type" "neon_logic<q>")]
719)
720
721;; TODO: investigate whether we should disable
722;; this and bicdi3_neon for the A8 in line with the other
723;; changes above.
724(define_insn_and_split "orndi3_neon"
725  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
726	(ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
727		(match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
728  "TARGET_NEON"
729  "@
730   vorn\t%P0, %P1, %P2
731   #
732   #
733   #"
734  "reload_completed &&
735   (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
736  [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
737   (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
738  "
739  {
740    if (TARGET_THUMB2)
741      {
742        operands[3] = gen_highpart (SImode, operands[0]);
743        operands[0] = gen_lowpart (SImode, operands[0]);
744        operands[4] = gen_highpart (SImode, operands[2]);
745        operands[2] = gen_lowpart (SImode, operands[2]);
746        operands[5] = gen_highpart (SImode, operands[1]);
747        operands[1] = gen_lowpart (SImode, operands[1]);
748      }
749    else
750      {
751        emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
752        emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
753        DONE;
754      }
755  }"
756  [(set_attr "type" "neon_logic,multiple,multiple,multiple")
757   (set_attr "length" "*,16,8,8")
758   (set_attr "arch" "any,a,t2,t2")]
759)
760
761(define_insn "bic<mode>3_neon"
762  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
763	(and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
764		 (match_operand:VDQ 1 "s_register_operand" "w")))]
765  "TARGET_NEON"
766  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
767  [(set_attr "type" "neon_logic<q>")]
768)
769
770;; Compare to *anddi_notdi_di.
771(define_insn "bicdi3_neon"
772  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
773        (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
774		(match_operand:DI 1 "s_register_operand" "w,0,r")))]
775  "TARGET_NEON"
776  "@
777   vbic\t%P0, %P1, %P2
778   #
779   #"
780  [(set_attr "type" "neon_logic,multiple,multiple")
781   (set_attr "length" "*,8,8")]
782)
783
784(define_insn "xor<mode>3"
785  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
786	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
787		 (match_operand:VDQ 2 "s_register_operand" "w")))]
788  "TARGET_NEON"
789  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
790  [(set_attr "type" "neon_logic<q>")]
791)
792
793(define_insn "one_cmpl<mode>2"
794  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
795        (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
796  "TARGET_NEON"
797  "vmvn\t%<V_reg>0, %<V_reg>1"
798  [(set_attr "type" "neon_move<q>")]
799)
800
801(define_insn "abs<mode>2"
802  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
803	(abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
804  "TARGET_NEON"
805  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
806  [(set (attr "type")
807      (if_then_else (match_test "<Is_float_mode>")
808                    (const_string "neon_fp_abs_s<q>")
809                    (const_string "neon_abs<q>")))]
810)
811
812(define_insn "neg<mode>2"
813  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
814	(neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
815  "TARGET_NEON"
816  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
817  [(set (attr "type")
818      (if_then_else (match_test "<Is_float_mode>")
819                    (const_string "neon_fp_neg_s<q>")
820                    (const_string "neon_neg<q>")))]
821)
822
823(define_insn "negdi2_neon"
824  [(set (match_operand:DI 0 "s_register_operand"	 "=&w, w,r,&r")
825	(neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
826   (clobber (match_scratch:DI 2				 "= X,&w,X, X"))
827   (clobber (reg:CC CC_REGNUM))]
828  "TARGET_NEON"
829  "#"
830  [(set_attr "length" "8")
831   (set_attr "type" "multiple")]
832)
833
834; Split negdi2_neon for vfp registers
835(define_split
836  [(set (match_operand:DI 0 "s_register_operand" "")
837	(neg:DI (match_operand:DI 1 "s_register_operand" "")))
838   (clobber (match_scratch:DI 2 ""))
839   (clobber (reg:CC CC_REGNUM))]
840  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
841  [(set (match_dup 2) (const_int 0))
842   (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
843	      (clobber (reg:CC CC_REGNUM))])]
844  {
845    if (!REG_P (operands[2]))
846      operands[2] = operands[0];
847  }
848)
849
850; Split negdi2_neon for core registers
851(define_split
852  [(set (match_operand:DI 0 "s_register_operand" "")
853	(neg:DI (match_operand:DI 1 "s_register_operand" "")))
854   (clobber (match_scratch:DI 2 ""))
855   (clobber (reg:CC CC_REGNUM))]
856  "TARGET_32BIT && reload_completed
857   && arm_general_register_operand (operands[0], DImode)"
858  [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
859	      (clobber (reg:CC CC_REGNUM))])]
860  ""
861)
862
863(define_insn "*umin<mode>3_neon"
864  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
865	(umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
866		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
867  "TARGET_NEON"
868  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869  [(set_attr "type" "neon_minmax<q>")]
870)
871
872(define_insn "*umax<mode>3_neon"
873  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
874	(umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
875		    (match_operand:VDQIW 2 "s_register_operand" "w")))]
876  "TARGET_NEON"
877  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
878  [(set_attr "type" "neon_minmax<q>")]
879)
880
881(define_insn "*smin<mode>3_neon"
882  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
883	(smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
884		   (match_operand:VDQW 2 "s_register_operand" "w")))]
885  "TARGET_NEON"
886  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
887  [(set (attr "type")
888      (if_then_else (match_test "<Is_float_mode>")
889                    (const_string "neon_fp_minmax_s<q>")
890                    (const_string "neon_minmax<q>")))]
891)
892
893(define_insn "*smax<mode>3_neon"
894  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
895	(smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
896		   (match_operand:VDQW 2 "s_register_operand" "w")))]
897  "TARGET_NEON"
898  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
899  [(set (attr "type")
900      (if_then_else (match_test "<Is_float_mode>")
901                    (const_string "neon_fp_minmax_s<q>")
902                    (const_string "neon_minmax<q>")))]
903)
904
905; TODO: V2DI shifts are current disabled because there are bugs in the
906; generic vectorizer code.  It ends up creating a V2DI constructor with
907; SImode elements.
908
909(define_insn "vashl<mode>3"
910  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
911	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
912		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
913  "TARGET_NEON"
914  {
915    switch (which_alternative)
916      {
917        case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
918        case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
919                         			    <MODE>mode,
920						    VALID_NEON_QREG_MODE (<MODE>mode),
921						    true);
922        default: gcc_unreachable ();
923      }
924  }
925  [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
926)
927
928(define_insn "vashr<mode>3_imm"
929  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
930	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
931			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
932  "TARGET_NEON"
933  {
934    return neon_output_shift_immediate ("vshr", 's', &operands[2],
935					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
936					false);
937  }
938  [(set_attr "type" "neon_shift_imm<q>")]
939)
940
941(define_insn "vlshr<mode>3_imm"
942  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
943	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
944			(match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
945  "TARGET_NEON"
946  {
947    return neon_output_shift_immediate ("vshr", 'u', &operands[2],
948					<MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
949					false);
950  }
951  [(set_attr "type" "neon_shift_imm<q>")]
952)
953
954; Used for implementing logical shift-right, which is a left-shift by a negative
955; amount, with signed operands. This is essentially the same as ashl<mode>3
956; above, but using an unspec in case GCC tries anything tricky with negative
957; shift amounts.
958
959(define_insn "ashl<mode>3_signed"
960  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
961	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
962		      (match_operand:VDQI 2 "s_register_operand" "w")]
963		     UNSPEC_ASHIFT_SIGNED))]
964  "TARGET_NEON"
965  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
966  [(set_attr "type" "neon_shift_reg<q>")]
967)
968
969; Used for implementing logical shift-right, which is a left-shift by a negative
970; amount, with unsigned operands.
971
972(define_insn "ashl<mode>3_unsigned"
973  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
974	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
975		      (match_operand:VDQI 2 "s_register_operand" "w")]
976		     UNSPEC_ASHIFT_UNSIGNED))]
977  "TARGET_NEON"
978  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
979  [(set_attr "type" "neon_shift_reg<q>")]
980)
981
982(define_expand "vashr<mode>3"
983  [(set (match_operand:VDQIW 0 "s_register_operand" "")
984	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
985			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
986  "TARGET_NEON"
987{
988  if (s_register_operand (operands[2], <MODE>mode))
989    {
990      rtx neg = gen_reg_rtx (<MODE>mode);
991      emit_insn (gen_neg<mode>2 (neg, operands[2]));
992      emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
993    }
994  else
995    emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
996  DONE;
997})
998
999(define_expand "vlshr<mode>3"
1000  [(set (match_operand:VDQIW 0 "s_register_operand" "")
1001	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1002			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1003  "TARGET_NEON"
1004{
1005  if (s_register_operand (operands[2], <MODE>mode))
1006    {
1007      rtx neg = gen_reg_rtx (<MODE>mode);
1008      emit_insn (gen_neg<mode>2 (neg, operands[2]));
1009      emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1010    }
1011  else
1012    emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1013  DONE;
1014})
1015
1016;; 64-bit shifts
1017
1018;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1019;; leaving the upper half uninitalized.  This is OK since the shift
1020;; instruction only looks at the low 8 bits anyway.  To avoid confusing
1021;; data flow analysis however, we pretend the full register is set
1022;; using an unspec.
1023(define_insn "neon_load_count"
1024  [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1025        (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1026                   UNSPEC_LOAD_COUNT))]
1027  "TARGET_NEON"
1028  "@
1029   vld1.32\t{%P0[0]}, %A1
1030   vmov.32\t%P0[0], %1"
1031  [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1032)
1033
1034(define_insn "ashldi3_neon_noclobber"
1035  [(set (match_operand:DI 0 "s_register_operand"	    "=w,w")
1036	(ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1037		   (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1038  "TARGET_NEON && reload_completed
1039   && (!CONST_INT_P (operands[2])
1040       || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1041  "@
1042   vshl.u64\t%P0, %P1, %2
1043   vshl.u64\t%P0, %P1, %P2"
1044  [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1045)
1046
1047(define_insn_and_split "ashldi3_neon"
1048  [(set (match_operand:DI 0 "s_register_operand"	    "= w, w, &r, r, &r, ?w,?w")
1049	(ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r, 0w, w")
1050		   (match_operand:SI 2 "general_operand"    "rUm, i,  r, i,  i,rUm, i")))
1051   (clobber (match_scratch:SI 3				    "= X, X, &r, X,  X,  X, X"))
1052   (clobber (match_scratch:SI 4				    "= X, X, &r, X,  X,  X, X"))
1053   (clobber (match_scratch:DI 5				    "=&w, X,  X, X,  X, &w, X"))
1054   (clobber (reg:CC_C CC_REGNUM))]
1055  "TARGET_NEON"
1056  "#"
1057  "TARGET_NEON && reload_completed"
1058  [(const_int 0)]
1059  "
1060  {
1061    if (IS_VFP_REGNUM (REGNO (operands[0])))
1062      {
1063        if (CONST_INT_P (operands[2]))
1064	  {
1065	    if (INTVAL (operands[2]) < 1)
1066	      {
1067	        emit_insn (gen_movdi (operands[0], operands[1]));
1068		DONE;
1069	      }
1070	    else if (INTVAL (operands[2]) > 63)
1071	      operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1072	  }
1073	else
1074	  {
1075	    emit_insn (gen_neon_load_count (operands[5], operands[2]));
1076	    operands[2] = operands[5];
1077	  }
1078
1079	/* Ditch the unnecessary clobbers.  */
1080	emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1081					       operands[2]));
1082      }
1083    else
1084      {
1085	/* The shift expanders support either full overlap or no overlap.  */
1086	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1087		    || REGNO (operands[0]) == REGNO (operands[1]));
1088
1089	if (operands[2] == CONST1_RTX (SImode))
1090	  /* This clobbers CC.  */
1091	  emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1092	else
1093	  arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1094					 operands[2], operands[3], operands[4]);
1095      }
1096    DONE;
1097  }"
1098  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1099   (set_attr "opt" "*,*,speed,speed,speed,*,*")
1100   (set_attr "type" "multiple")]
1101)
1102
1103; The shift amount needs to be negated for right-shifts
1104(define_insn "signed_shift_di3_neon"
1105  [(set (match_operand:DI 0 "s_register_operand"	     "=w")
1106	(unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1107		    (match_operand:DI 2 "s_register_operand" " w")]
1108		   UNSPEC_ASHIFT_SIGNED))]
1109  "TARGET_NEON && reload_completed"
1110  "vshl.s64\t%P0, %P1, %P2"
1111  [(set_attr "type" "neon_shift_reg")]
1112)
1113
1114; The shift amount needs to be negated for right-shifts
1115(define_insn "unsigned_shift_di3_neon"
1116  [(set (match_operand:DI 0 "s_register_operand"	     "=w")
1117	(unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1118		    (match_operand:DI 2 "s_register_operand" " w")]
1119		   UNSPEC_ASHIFT_UNSIGNED))]
1120  "TARGET_NEON && reload_completed"
1121  "vshl.u64\t%P0, %P1, %P2"
1122  [(set_attr "type" "neon_shift_reg")]
1123)
1124
1125(define_insn "ashrdi3_neon_imm_noclobber"
1126  [(set (match_operand:DI 0 "s_register_operand"	      "=w")
1127	(ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1128		     (match_operand:DI 2 "const_int_operand"  " i")))]
1129  "TARGET_NEON && reload_completed
1130   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1131  "vshr.s64\t%P0, %P1, %2"
1132  [(set_attr "type" "neon_shift_imm")]
1133)
1134
1135(define_insn "lshrdi3_neon_imm_noclobber"
1136  [(set (match_operand:DI 0 "s_register_operand"	      "=w")
1137	(lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1138		     (match_operand:DI 2 "const_int_operand"  " i")))]
1139  "TARGET_NEON && reload_completed
1140   && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1141  "vshr.u64\t%P0, %P1, %2"
1142  [(set_attr "type" "neon_shift_imm")]
1143)
1144
1145;; ashrdi3_neon
1146;; lshrdi3_neon
1147(define_insn_and_split "<shift>di3_neon"
1148  [(set (match_operand:DI 0 "s_register_operand"	     "= w, w, &r, r, &r,?w,?w")
1149	(RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0,  r,0w, w")
1150		    (match_operand:SI 2 "reg_or_int_operand" "  r, i,  r, i,  i, r, i")))
1151   (clobber (match_scratch:SI 3				     "=2r, X, &r, X,  X,2r, X"))
1152   (clobber (match_scratch:SI 4				     "= X, X, &r, X,  X, X, X"))
1153   (clobber (match_scratch:DI 5				     "=&w, X,  X, X, X,&w, X"))
1154   (clobber (reg:CC CC_REGNUM))]
1155  "TARGET_NEON"
1156  "#"
1157  "TARGET_NEON && reload_completed"
1158  [(const_int 0)]
1159  "
1160  {
1161    if (IS_VFP_REGNUM (REGNO (operands[0])))
1162      {
1163	if (CONST_INT_P (operands[2]))
1164	  {
1165	    if (INTVAL (operands[2]) < 1)
1166	      {
1167	        emit_insn (gen_movdi (operands[0], operands[1]));
1168		DONE;
1169	      }
1170	    else if (INTVAL (operands[2]) > 64)
1171	      operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1172
1173	    /* Ditch the unnecessary clobbers.  */
1174	    emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1175							  operands[1],
1176							  operands[2]));
1177	  }
1178	else
1179	  {
1180	    /* We must use a negative left-shift.  */
1181	    emit_insn (gen_negsi2 (operands[3], operands[2]));
1182	    emit_insn (gen_neon_load_count (operands[5], operands[3]));
1183	    emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1184						       operands[5]));
1185	  }
1186      }
1187    else
1188      {
1189	/* The shift expanders support either full overlap or no overlap.  */
1190	gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1191		    || REGNO (operands[0]) == REGNO (operands[1]));
1192
1193	if (operands[2] == CONST1_RTX (SImode))
1194	  /* This clobbers CC.  */
1195	  emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1196	else
1197	  /* This clobbers CC (ASHIFTRT by register only).  */
1198	  arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1199				 	 operands[2], operands[3], operands[4]);
1200      }
1201
1202    DONE;
1203  }"
1204  [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1205   (set_attr "opt" "*,*,speed,speed,speed,*,*")
1206   (set_attr "type" "multiple")]
1207)
1208
1209;; Widening operations
1210
1211(define_insn "widen_ssum<mode>3"
1212  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1213	(plus:<V_widen> (sign_extend:<V_widen>
1214			  (match_operand:VW 1 "s_register_operand" "%w"))
1215		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1216  "TARGET_NEON"
1217  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1218  [(set_attr "type" "neon_add_widen")]
1219)
1220
1221(define_insn "widen_usum<mode>3"
1222  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1223	(plus:<V_widen> (zero_extend:<V_widen>
1224			  (match_operand:VW 1 "s_register_operand" "%w"))
1225		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1226  "TARGET_NEON"
1227  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1228  [(set_attr "type" "neon_add_widen")]
1229)
1230
1231;; Helpers for quad-word reduction operations
1232
1233; Add (or smin, smax...) the low N/2 elements of the N-element vector
1234; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1235; N/2-element vector.
1236
1237(define_insn "quad_halves_<code>v4si"
1238  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1239        (VQH_OPS:V2SI
1240          (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1241                           (parallel [(const_int 0) (const_int 1)]))
1242          (vec_select:V2SI (match_dup 1)
1243                           (parallel [(const_int 2) (const_int 3)]))))]
1244  "TARGET_NEON"
1245  "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1246  [(set_attr "vqh_mnem" "<VQH_mnem>")
1247   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1248)
1249
1250(define_insn "quad_halves_<code>v4sf"
1251  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1252        (VQHS_OPS:V2SF
1253          (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1254                           (parallel [(const_int 0) (const_int 1)]))
1255          (vec_select:V2SF (match_dup 1)
1256                           (parallel [(const_int 2) (const_int 3)]))))]
1257  "TARGET_NEON && flag_unsafe_math_optimizations"
1258  "<VQH_mnem>.f32\t%P0, %e1, %f1"
1259  [(set_attr "vqh_mnem" "<VQH_mnem>")
1260   (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1261)
1262
1263(define_insn "quad_halves_<code>v8hi"
1264  [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1265        (VQH_OPS:V4HI
1266          (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1267                           (parallel [(const_int 0) (const_int 1)
1268				      (const_int 2) (const_int 3)]))
1269          (vec_select:V4HI (match_dup 1)
1270                           (parallel [(const_int 4) (const_int 5)
1271				      (const_int 6) (const_int 7)]))))]
1272  "TARGET_NEON"
1273  "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1274  [(set_attr "vqh_mnem" "<VQH_mnem>")
1275   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1276)
1277
1278(define_insn "quad_halves_<code>v16qi"
1279  [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1280        (VQH_OPS:V8QI
1281          (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1282                           (parallel [(const_int 0) (const_int 1)
1283				      (const_int 2) (const_int 3)
1284				      (const_int 4) (const_int 5)
1285				      (const_int 6) (const_int 7)]))
1286          (vec_select:V8QI (match_dup 1)
1287                           (parallel [(const_int 8) (const_int 9)
1288				      (const_int 10) (const_int 11)
1289				      (const_int 12) (const_int 13)
1290				      (const_int 14) (const_int 15)]))))]
1291  "TARGET_NEON"
1292  "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1293  [(set_attr "vqh_mnem" "<VQH_mnem>")
1294   (set_attr "type" "neon_reduc_<VQH_type>_q")]
1295)
1296
1297(define_expand "move_hi_quad_<mode>"
1298 [(match_operand:ANY128 0 "s_register_operand" "")
1299  (match_operand:<V_HALF> 1 "s_register_operand" "")]
1300 "TARGET_NEON"
1301{
1302  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1303				       GET_MODE_SIZE (<V_HALF>mode)),
1304		  operands[1]);
1305  DONE;
1306})
1307
1308(define_expand "move_lo_quad_<mode>"
1309 [(match_operand:ANY128 0 "s_register_operand" "")
1310  (match_operand:<V_HALF> 1 "s_register_operand" "")]
1311 "TARGET_NEON"
1312{
1313  emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1314				       <MODE>mode, 0),
1315		  operands[1]);
1316  DONE;
1317})
1318
1319;; Reduction operations
1320
1321(define_expand "reduc_plus_scal_<mode>"
1322  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1323   (match_operand:VD 1 "s_register_operand" "")]
1324  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1325{
1326  rtx vec = gen_reg_rtx (<MODE>mode);
1327  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1328			&gen_neon_vpadd_internal<mode>);
1329  /* The same result is actually computed into every element.  */
1330  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1331  DONE;
1332})
1333
1334(define_expand "reduc_plus_scal_<mode>"
1335  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1336   (match_operand:VQ 1 "s_register_operand" "")]
1337  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1338   && !BYTES_BIG_ENDIAN"
1339{
1340  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1341
1342  emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1343  emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1344
1345  DONE;
1346})
1347
1348(define_expand "reduc_plus_scal_v2di"
1349  [(match_operand:DI 0 "nonimmediate_operand" "=w")
1350   (match_operand:V2DI 1 "s_register_operand" "")]
1351  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1352{
1353  rtx vec = gen_reg_rtx (V2DImode);
1354
1355  emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1356  emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1357
1358  DONE;
1359})
1360
1361(define_insn "arm_reduc_plus_internal_v2di"
1362  [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1363	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1364		     UNSPEC_VPADD))]
1365  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1366  "vadd.i64\t%e0, %e1, %f1"
1367  [(set_attr "type" "neon_add_q")]
1368)
1369
1370(define_expand "reduc_smin_scal_<mode>"
1371  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1372   (match_operand:VD 1 "s_register_operand" "")]
1373  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1374{
1375  rtx vec = gen_reg_rtx (<MODE>mode);
1376
1377  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1378			&gen_neon_vpsmin<mode>);
1379  /* The result is computed into every element of the vector.  */
1380  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1381  DONE;
1382})
1383
1384(define_expand "reduc_smin_scal_<mode>"
1385  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1386   (match_operand:VQ 1 "s_register_operand" "")]
1387  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1388   && !BYTES_BIG_ENDIAN"
1389{
1390  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1391
1392  emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1393  emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1394
1395  DONE;
1396})
1397
1398(define_expand "reduc_smax_scal_<mode>"
1399  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1400   (match_operand:VD 1 "s_register_operand" "")]
1401  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1402{
1403  rtx vec = gen_reg_rtx (<MODE>mode);
1404  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1405			&gen_neon_vpsmax<mode>);
1406  /* The result is computed into every element of the vector.  */
1407  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1408  DONE;
1409})
1410
1411(define_expand "reduc_smax_scal_<mode>"
1412  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1413   (match_operand:VQ 1 "s_register_operand" "")]
1414  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1415   && !BYTES_BIG_ENDIAN"
1416{
1417  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1418
1419  emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1420  emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1421
1422  DONE;
1423})
1424
1425(define_expand "reduc_umin_scal_<mode>"
1426  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1427   (match_operand:VDI 1 "s_register_operand" "")]
1428  "TARGET_NEON"
1429{
1430  rtx vec = gen_reg_rtx (<MODE>mode);
1431  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1432			&gen_neon_vpumin<mode>);
1433  /* The result is computed into every element of the vector.  */
1434  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1435  DONE;
1436})
1437
1438(define_expand "reduc_umin_scal_<mode>"
1439  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1440   (match_operand:VQI 1 "s_register_operand" "")]
1441  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1442{
1443  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1444
1445  emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1446  emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1447
1448  DONE;
1449})
1450
1451(define_expand "reduc_umax_scal_<mode>"
1452  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1453   (match_operand:VDI 1 "s_register_operand" "")]
1454  "TARGET_NEON"
1455{
1456  rtx vec = gen_reg_rtx (<MODE>mode);
1457  neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1458			&gen_neon_vpumax<mode>);
1459  /* The result is computed into every element of the vector.  */
1460  emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1461  DONE;
1462})
1463
1464(define_expand "reduc_umax_scal_<mode>"
1465  [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1466   (match_operand:VQI 1 "s_register_operand" "")]
1467  "TARGET_NEON && !BYTES_BIG_ENDIAN"
1468{
1469  rtx step1 = gen_reg_rtx (<V_HALF>mode);
1470
1471  emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1472  emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1473
1474  DONE;
1475})
1476
1477(define_insn "neon_vpadd_internal<mode>"
1478  [(set (match_operand:VD 0 "s_register_operand" "=w")
1479	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1480		    (match_operand:VD 2 "s_register_operand" "w")]
1481                   UNSPEC_VPADD))]
1482  "TARGET_NEON"
1483  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1484  ;; Assume this schedules like vadd.
1485  [(set (attr "type")
1486      (if_then_else (match_test "<Is_float_mode>")
1487                    (const_string "neon_fp_reduc_add_s<q>")
1488                    (const_string "neon_reduc_add<q>")))]
1489)
1490
1491(define_insn "neon_vpsmin<mode>"
1492  [(set (match_operand:VD 0 "s_register_operand" "=w")
1493	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1494		    (match_operand:VD 2 "s_register_operand" "w")]
1495                   UNSPEC_VPSMIN))]
1496  "TARGET_NEON"
1497  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1498  [(set (attr "type")
1499      (if_then_else (match_test "<Is_float_mode>")
1500                    (const_string "neon_fp_reduc_minmax_s<q>")
1501                    (const_string "neon_reduc_minmax<q>")))]
1502)
1503
1504(define_insn "neon_vpsmax<mode>"
1505  [(set (match_operand:VD 0 "s_register_operand" "=w")
1506	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1507		    (match_operand:VD 2 "s_register_operand" "w")]
1508                   UNSPEC_VPSMAX))]
1509  "TARGET_NEON"
1510  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1511  [(set (attr "type")
1512      (if_then_else (match_test "<Is_float_mode>")
1513                    (const_string "neon_fp_reduc_minmax_s<q>")
1514                    (const_string "neon_reduc_minmax<q>")))]
1515)
1516
1517(define_insn "neon_vpumin<mode>"
1518  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1519	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1520		     (match_operand:VDI 2 "s_register_operand" "w")]
1521                   UNSPEC_VPUMIN))]
1522  "TARGET_NEON"
1523  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1524  [(set_attr "type" "neon_reduc_minmax<q>")]
1525)
1526
1527(define_insn "neon_vpumax<mode>"
1528  [(set (match_operand:VDI 0 "s_register_operand" "=w")
1529	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1530		     (match_operand:VDI 2 "s_register_operand" "w")]
1531                   UNSPEC_VPUMAX))]
1532  "TARGET_NEON"
1533  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1534  [(set_attr "type" "neon_reduc_minmax<q>")]
1535)
1536
1537;; Saturating arithmetic
1538
1539; NOTE: Neon supports many more saturating variants of instructions than the
1540; following, but these are all GCC currently understands.
1541; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1542; yet either, although these patterns may be used by intrinsics when they're
1543; added.
1544
1545(define_insn "*ss_add<mode>_neon"
1546  [(set (match_operand:VD 0 "s_register_operand" "=w")
1547       (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1548                   (match_operand:VD 2 "s_register_operand" "w")))]
1549  "TARGET_NEON"
1550  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1551  [(set_attr "type" "neon_qadd<q>")]
1552)
1553
1554(define_insn "*us_add<mode>_neon"
1555  [(set (match_operand:VD 0 "s_register_operand" "=w")
1556       (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1557                   (match_operand:VD 2 "s_register_operand" "w")))]
1558  "TARGET_NEON"
1559  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1560  [(set_attr "type" "neon_qadd<q>")]
1561)
1562
1563(define_insn "*ss_sub<mode>_neon"
1564  [(set (match_operand:VD 0 "s_register_operand" "=w")
1565       (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1566                    (match_operand:VD 2 "s_register_operand" "w")))]
1567  "TARGET_NEON"
1568  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1569  [(set_attr "type" "neon_qsub<q>")]
1570)
1571
1572(define_insn "*us_sub<mode>_neon"
1573  [(set (match_operand:VD 0 "s_register_operand" "=w")
1574       (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1575                    (match_operand:VD 2 "s_register_operand" "w")))]
1576  "TARGET_NEON"
1577  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1578  [(set_attr "type" "neon_qsub<q>")]
1579)
1580
1581;; Conditional instructions.  These are comparisons with conditional moves for
1582;; vectors.  They perform the assignment:
1583;;
1584;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1585;;
1586;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
1587;; element-wise.
1588
1589(define_expand "vcond<mode><mode>"
1590  [(set (match_operand:VDQW 0 "s_register_operand" "")
1591	(if_then_else:VDQW
1592	  (match_operator 3 "comparison_operator"
1593	    [(match_operand:VDQW 4 "s_register_operand" "")
1594	     (match_operand:VDQW 5 "nonmemory_operand" "")])
1595	  (match_operand:VDQW 1 "s_register_operand" "")
1596	  (match_operand:VDQW 2 "s_register_operand" "")))]
1597  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1598{
1599  int inverse = 0;
1600  int use_zero_form = 0;
1601  int swap_bsl_operands = 0;
1602  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1603  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1604
1605  rtx (*base_comparison) (rtx, rtx, rtx);
1606  rtx (*complimentary_comparison) (rtx, rtx, rtx);
1607
1608  switch (GET_CODE (operands[3]))
1609    {
1610    case GE:
1611    case GT:
1612    case LE:
1613    case LT:
1614    case EQ:
1615      if (operands[5] == CONST0_RTX (<MODE>mode))
1616	{
1617	  use_zero_form = 1;
1618	  break;
1619	}
1620      /* Fall through.  */
1621    default:
1622      if (!REG_P (operands[5]))
1623	operands[5] = force_reg (<MODE>mode, operands[5]);
1624    }
1625
1626  switch (GET_CODE (operands[3]))
1627    {
1628    case LT:
1629    case UNLT:
1630      inverse = 1;
1631      /* Fall through.  */
1632    case GE:
1633    case UNGE:
1634    case ORDERED:
1635    case UNORDERED:
1636      base_comparison = gen_neon_vcge<mode>;
1637      complimentary_comparison = gen_neon_vcgt<mode>;
1638      break;
1639    case LE:
1640    case UNLE:
1641      inverse = 1;
1642      /* Fall through.  */
1643    case GT:
1644    case UNGT:
1645      base_comparison = gen_neon_vcgt<mode>;
1646      complimentary_comparison = gen_neon_vcge<mode>;
1647      break;
1648    case EQ:
1649    case NE:
1650    case UNEQ:
1651      base_comparison = gen_neon_vceq<mode>;
1652      complimentary_comparison = gen_neon_vceq<mode>;
1653      break;
1654    default:
1655      gcc_unreachable ();
1656    }
1657
1658  switch (GET_CODE (operands[3]))
1659    {
1660    case LT:
1661    case LE:
1662    case GT:
1663    case GE:
1664    case EQ:
1665      /* The easy case.  Here we emit one of vcge, vcgt or vceq.
1666	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
1667	 a GE b -> a GE b
1668	 a GT b -> a GT b
1669	 a LE b -> b GE a
1670	 a LT b -> b GT a
1671	 a EQ b -> a EQ b
1672	 Note that there also exist direct comparison against 0 forms,
1673	 so catch those as a special case.  */
1674      if (use_zero_form)
1675	{
1676	  inverse = 0;
1677	  switch (GET_CODE (operands[3]))
1678	    {
1679	    case LT:
1680	      base_comparison = gen_neon_vclt<mode>;
1681	      break;
1682	    case LE:
1683	      base_comparison = gen_neon_vcle<mode>;
1684	      break;
1685	    default:
1686	      /* Do nothing, other zero form cases already have the correct
1687		 base_comparison.  */
1688	      break;
1689	    }
1690	}
1691
1692      if (!inverse)
1693	emit_insn (base_comparison (mask, operands[4], operands[5]));
1694      else
1695	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1696      break;
1697    case UNLT:
1698    case UNLE:
1699    case UNGT:
1700    case UNGE:
1701    case NE:
1702      /* Vector compare returns false for lanes which are unordered, so if we use
1703	 the inverse of the comparison we actually want to emit, then
1704	 swap the operands to BSL, we will end up with the correct result.
1705	 Note that a NE NaN and NaN NE b are true for all a, b.
1706
1707	 Our transformations are:
1708	 a GE b -> !(b GT a)
1709	 a GT b -> !(b GE a)
1710	 a LE b -> !(a GT b)
1711	 a LT b -> !(a GE b)
1712	 a NE b -> !(a EQ b)  */
1713
1714      if (inverse)
1715	emit_insn (base_comparison (mask, operands[4], operands[5]));
1716      else
1717	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1718
1719      swap_bsl_operands = 1;
1720      break;
1721    case UNEQ:
1722      /* We check (a > b ||  b > a).  combining these comparisons give us
1723	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1724	 will then give us (a == b ||  a UNORDERED b) as intended.  */
1725
1726      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1727      emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1728      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1729      swap_bsl_operands = 1;
1730      break;
1731    case UNORDERED:
1732       /* Operands are ORDERED iff (a > b || b >= a).
1733	 Swapping the operands to BSL will give the UNORDERED case.  */
1734     swap_bsl_operands = 1;
1735     /* Fall through.  */
1736    case ORDERED:
1737      emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1738      emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1739      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1740      break;
1741    default:
1742      gcc_unreachable ();
1743    }
1744
1745  if (swap_bsl_operands)
1746    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1747				    operands[1]));
1748  else
1749    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1750				    operands[2]));
1751  DONE;
1752})
1753
1754(define_expand "vcondu<mode><mode>"
1755  [(set (match_operand:VDQIW 0 "s_register_operand" "")
1756	(if_then_else:VDQIW
1757	  (match_operator 3 "arm_comparison_operator"
1758	    [(match_operand:VDQIW 4 "s_register_operand" "")
1759	     (match_operand:VDQIW 5 "s_register_operand" "")])
1760	  (match_operand:VDQIW 1 "s_register_operand" "")
1761	  (match_operand:VDQIW 2 "s_register_operand" "")))]
1762  "TARGET_NEON"
1763{
1764  rtx mask;
1765  int inverse = 0, immediate_zero = 0;
1766
1767  mask = gen_reg_rtx (<V_cmp_result>mode);
1768
1769  if (operands[5] == CONST0_RTX (<MODE>mode))
1770    immediate_zero = 1;
1771  else if (!REG_P (operands[5]))
1772    operands[5] = force_reg (<MODE>mode, operands[5]);
1773
1774  switch (GET_CODE (operands[3]))
1775    {
1776    case GEU:
1777      emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1778      break;
1779
1780    case GTU:
1781      emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1782      break;
1783
1784    case EQ:
1785      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1786      break;
1787
1788    case LEU:
1789      if (immediate_zero)
1790	emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1791      else
1792	emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1793      break;
1794
1795    case LTU:
1796      if (immediate_zero)
1797        emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1798      else
1799	emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1800      break;
1801
1802    case NE:
1803      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1804      inverse = 1;
1805      break;
1806
1807    default:
1808      gcc_unreachable ();
1809    }
1810
1811  if (inverse)
1812    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1813				    operands[1]));
1814  else
1815    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1816				    operands[2]));
1817
1818  DONE;
1819})
1820
1821;; Patterns for builtins.
1822
1823; good for plain vadd, vaddq.
1824
1825(define_expand "neon_vadd<mode>"
1826  [(match_operand:VCVTF 0 "s_register_operand" "=w")
1827   (match_operand:VCVTF 1 "s_register_operand" "w")
1828   (match_operand:VCVTF 2 "s_register_operand" "w")]
1829  "TARGET_NEON"
1830{
1831  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1832    emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1833  else
1834    emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1835					   operands[2]));
1836  DONE;
1837})
1838
1839; Note that NEON operations don't support the full IEEE 754 standard: in
1840; particular, denormal values are flushed to zero.  This means that GCC cannot
1841; use those instructions for autovectorization, etc. unless
1842; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1843; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
1844; header) must work in either case: if -funsafe-math-optimizations is given,
1845; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1846; expand to unspecs (which may potentially limit the extent to which they might
1847; be optimized by generic code).
1848
1849; Used for intrinsics when flag_unsafe_math_optimizations is false.
1850
1851(define_insn "neon_vadd<mode>_unspec"
1852  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1853        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1854		      (match_operand:VCVTF 2 "s_register_operand" "w")]
1855                     UNSPEC_VADD))]
1856  "TARGET_NEON"
1857  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1858  [(set (attr "type")
1859      (if_then_else (match_test "<Is_float_mode>")
1860                    (const_string "neon_fp_addsub_s<q>")
1861                    (const_string "neon_add<q>")))]
1862)
1863
1864(define_insn "neon_vaddl<sup><mode>"
1865  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1866        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1867		           (match_operand:VDI 2 "s_register_operand" "w")]
1868                          VADDL))]
1869  "TARGET_NEON"
1870  "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1871  [(set_attr "type" "neon_add_long")]
1872)
1873
1874(define_insn "neon_vaddw<sup><mode>"
1875  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1876        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1877		           (match_operand:VDI 2 "s_register_operand" "w")]
1878                          VADDW))]
1879  "TARGET_NEON"
1880  "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1881  [(set_attr "type" "neon_add_widen")]
1882)
1883
1884; vhadd and vrhadd.
1885
1886(define_insn "neon_v<r>hadd<sup><mode>"
1887  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1888        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1889		       (match_operand:VDQIW 2 "s_register_operand" "w")]
1890		      VHADD))]
1891  "TARGET_NEON"
1892  "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1893  [(set_attr "type" "neon_add_halve_q")]
1894)
1895
1896(define_insn "neon_vqadd<sup><mode>"
1897  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1898        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1899		       (match_operand:VDQIX 2 "s_register_operand" "w")]
1900                     VQADD))]
1901  "TARGET_NEON"
1902  "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1903  [(set_attr "type" "neon_qadd<q>")]
1904)
1905
1906(define_insn "neon_v<r>addhn<mode>"
1907  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1908        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1909		            (match_operand:VN 2 "s_register_operand" "w")]
1910                           VADDHN))]
1911  "TARGET_NEON"
1912  "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1913  [(set_attr "type" "neon_add_halve_narrow_q")]
1914)
1915
1916;; Polynomial and Float multiplication.
1917(define_insn "neon_vmul<pf><mode>"
1918  [(set (match_operand:VPF 0 "s_register_operand" "=w")
1919        (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1920		      (match_operand:VPF 2 "s_register_operand" "w")]
1921		     UNSPEC_VMUL))]
1922  "TARGET_NEON"
1923  "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1924  [(set (attr "type")
1925      (if_then_else (match_test "<Is_float_mode>")
1926                    (const_string "neon_fp_mul_s<q>")
1927                    (const_string "neon_mul_<V_elem_ch><q>")))]
1928)
1929
1930(define_expand "neon_vmla<mode>"
1931  [(match_operand:VDQW 0 "s_register_operand" "=w")
1932   (match_operand:VDQW 1 "s_register_operand" "0")
1933   (match_operand:VDQW 2 "s_register_operand" "w")
1934   (match_operand:VDQW 3 "s_register_operand" "w")]
1935  "TARGET_NEON"
1936{
1937  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1938    emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1939				             operands[2], operands[3]));
1940  else
1941    emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1942					   operands[2], operands[3]));
1943  DONE;
1944})
1945
1946(define_expand "neon_vfma<VCVTF:mode>"
1947  [(match_operand:VCVTF 0 "s_register_operand")
1948   (match_operand:VCVTF 1 "s_register_operand")
1949   (match_operand:VCVTF 2 "s_register_operand")
1950   (match_operand:VCVTF 3 "s_register_operand")]
1951  "TARGET_NEON && TARGET_FMA"
1952{
1953  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1954				       operands[1]));
1955  DONE;
1956})
1957
1958(define_expand "neon_vfms<VCVTF:mode>"
1959  [(match_operand:VCVTF 0 "s_register_operand")
1960   (match_operand:VCVTF 1 "s_register_operand")
1961   (match_operand:VCVTF 2 "s_register_operand")
1962   (match_operand:VCVTF 3 "s_register_operand")]
1963  "TARGET_NEON && TARGET_FMA"
1964{
1965  emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1966					 operands[1]));
1967  DONE;
1968})
1969
1970; Used for intrinsics when flag_unsafe_math_optimizations is false.
1971
1972(define_insn "neon_vmla<mode>_unspec"
1973  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1974	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
1975		      (match_operand:VDQW 2 "s_register_operand" "w")
1976		      (match_operand:VDQW 3 "s_register_operand" "w")]
1977		    UNSPEC_VMLA))]
1978  "TARGET_NEON"
1979  "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
1980  [(set (attr "type")
1981      (if_then_else (match_test "<Is_float_mode>")
1982                    (const_string "neon_fp_mla_s<q>")
1983                    (const_string "neon_mla_<V_elem_ch><q>")))]
1984)
1985
1986(define_insn "neon_vmlal<sup><mode>"
1987  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1988        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
1989		           (match_operand:VW 2 "s_register_operand" "w")
1990		           (match_operand:VW 3 "s_register_operand" "w")]
1991                          VMLAL))]
1992  "TARGET_NEON"
1993  "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
1994  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
1995)
1996
1997(define_expand "neon_vmls<mode>"
1998  [(match_operand:VDQW 0 "s_register_operand" "=w")
1999   (match_operand:VDQW 1 "s_register_operand" "0")
2000   (match_operand:VDQW 2 "s_register_operand" "w")
2001   (match_operand:VDQW 3 "s_register_operand" "w")]
2002  "TARGET_NEON"
2003{
2004  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2005    emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2006		 operands[1], operands[2], operands[3]));
2007  else
2008    emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2009					   operands[2], operands[3]));
2010  DONE;
2011})
2012
2013; Used for intrinsics when flag_unsafe_math_optimizations is false.
2014
2015(define_insn "neon_vmls<mode>_unspec"
2016  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2017	(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2018		      (match_operand:VDQW 2 "s_register_operand" "w")
2019		      (match_operand:VDQW 3 "s_register_operand" "w")]
2020		    UNSPEC_VMLS))]
2021  "TARGET_NEON"
2022  "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2023  [(set (attr "type")
2024      (if_then_else (match_test "<Is_float_mode>")
2025                    (const_string "neon_fp_mla_s<q>")
2026                    (const_string "neon_mla_<V_elem_ch><q>")))]
2027)
2028
2029(define_insn "neon_vmlsl<sup><mode>"
2030  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2031        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2032		           (match_operand:VW 2 "s_register_operand" "w")
2033		           (match_operand:VW 3 "s_register_operand" "w")]
2034                          VMLSL))]
2035  "TARGET_NEON"
2036  "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2037  [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2038)
2039
2040;; vqdmulh, vqrdmulh
2041(define_insn "neon_vq<r>dmulh<mode>"
2042  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2043        (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2044		       (match_operand:VMDQI 2 "s_register_operand" "w")]
2045                      VQDMULH))]
2046  "TARGET_NEON"
2047  "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2048  [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2049)
2050
2051;; vqrdmlah, vqrdmlsh
2052(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2053  [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2054	(unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2055		       (match_operand:VMDQI 2 "s_register_operand" "w")
2056		       (match_operand:VMDQI 3 "s_register_operand" "w")]
2057		      VQRDMLH_AS))]
2058  "TARGET_NEON_RDMA"
2059  "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2060  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2061)
2062
2063(define_insn "neon_vqdmlal<mode>"
2064  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2065        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2066		           (match_operand:VMDI 2 "s_register_operand" "w")
2067		           (match_operand:VMDI 3 "s_register_operand" "w")]
2068                          UNSPEC_VQDMLAL))]
2069  "TARGET_NEON"
2070  "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2071  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2072)
2073
2074(define_insn "neon_vqdmlsl<mode>"
2075  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2076        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2077		           (match_operand:VMDI 2 "s_register_operand" "w")
2078		           (match_operand:VMDI 3 "s_register_operand" "w")]
2079                          UNSPEC_VQDMLSL))]
2080  "TARGET_NEON"
2081  "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2082  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2083)
2084
2085(define_insn "neon_vmull<sup><mode>"
2086  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2087        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2088		           (match_operand:VW 2 "s_register_operand" "w")]
2089                          VMULL))]
2090  "TARGET_NEON"
2091  "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2092  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2093)
2094
2095(define_insn "neon_vqdmull<mode>"
2096  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2097        (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2098		           (match_operand:VMDI 2 "s_register_operand" "w")]
2099                          UNSPEC_VQDMULL))]
2100  "TARGET_NEON"
2101  "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2102  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2103)
2104
2105(define_expand "neon_vsub<mode>"
2106  [(match_operand:VCVTF 0 "s_register_operand" "=w")
2107   (match_operand:VCVTF 1 "s_register_operand" "w")
2108   (match_operand:VCVTF 2 "s_register_operand" "w")]
2109  "TARGET_NEON"
2110{
2111  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2112    emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2113  else
2114    emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2115					   operands[2]));
2116  DONE;
2117})
2118
2119; Used for intrinsics when flag_unsafe_math_optimizations is false.
2120
2121(define_insn "neon_vsub<mode>_unspec"
2122  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2123        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2124		      (match_operand:VCVTF 2 "s_register_operand" "w")]
2125                     UNSPEC_VSUB))]
2126  "TARGET_NEON"
2127  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2128  [(set (attr "type")
2129      (if_then_else (match_test "<Is_float_mode>")
2130                    (const_string "neon_fp_addsub_s<q>")
2131                    (const_string "neon_sub<q>")))]
2132)
2133
2134(define_insn "neon_vsubl<sup><mode>"
2135  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2136        (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2137		           (match_operand:VDI 2 "s_register_operand" "w")]
2138                          VSUBL))]
2139  "TARGET_NEON"
2140  "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2141  [(set_attr "type" "neon_sub_long")]
2142)
2143
2144(define_insn "neon_vsubw<sup><mode>"
2145  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2146        (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2147		           (match_operand:VDI 2 "s_register_operand" "w")]
2148			  VSUBW))]
2149  "TARGET_NEON"
2150  "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2151  [(set_attr "type" "neon_sub_widen")]
2152)
2153
2154(define_insn "neon_vqsub<sup><mode>"
2155  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2156        (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2157		       (match_operand:VDQIX 2 "s_register_operand" "w")]
2158		      VQSUB))]
2159  "TARGET_NEON"
2160  "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2161  [(set_attr "type" "neon_qsub<q>")]
2162)
2163
2164(define_insn "neon_vhsub<sup><mode>"
2165  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2166        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2167		       (match_operand:VDQIW 2 "s_register_operand" "w")]
2168		      VHSUB))]
2169  "TARGET_NEON"
2170  "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2171  [(set_attr "type" "neon_sub_halve<q>")]
2172)
2173
2174(define_insn "neon_v<r>subhn<mode>"
2175  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2176        (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2177		            (match_operand:VN 2 "s_register_operand" "w")]
2178                           VSUBHN))]
2179  "TARGET_NEON"
2180  "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2181  [(set_attr "type" "neon_sub_halve_narrow_q")]
2182)
2183
2184;; These may expand to an UNSPEC pattern when a floating point mode is used
2185;; without unsafe math optimizations.
2186(define_expand "neon_vc<cmp_op><mode>"
2187  [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2188     (neg:<V_cmp_result>
2189       (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2190                         (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2191  "TARGET_NEON"
2192  {
2193    /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2194       are enabled.  */
2195    if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2196        && !flag_unsafe_math_optimizations)
2197      {
2198        /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2199           we define gen_neon_vceq<mode>_insn_unspec only for float modes
2200           whereas this expander iterates over the integer modes as well,
2201           but we will never expand to UNSPECs for the integer comparisons.  */
2202        switch (<MODE>mode)
2203          {
2204            case V2SFmode:
2205              emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2206                                                              operands[1],
2207                                                              operands[2]));
2208              break;
2209            case V4SFmode:
2210              emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2211                                                              operands[1],
2212                                                              operands[2]));
2213              break;
2214            default:
2215              gcc_unreachable ();
2216          }
2217      }
2218    else
2219      emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2220                                                 operands[1],
2221                                                 operands[2]));
2222    DONE;
2223  }
2224)
2225
2226(define_insn "neon_vc<cmp_op><mode>_insn"
2227  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2228        (neg:<V_cmp_result>
2229          (COMPARISONS:<V_cmp_result>
2230            (match_operand:VDQW 1 "s_register_operand" "w,w")
2231            (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2232  "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2233                    && !flag_unsafe_math_optimizations)"
2234  {
2235    char pattern[100];
2236    sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2237                      " %%<V_reg>1, %s",
2238                       GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2239                         ? "f" : "<cmp_type>",
2240                       which_alternative == 0
2241                         ? "%<V_reg>2" : "#0");
2242    output_asm_insn (pattern, operands);
2243    return "";
2244  }
2245  [(set (attr "type")
2246        (if_then_else (match_operand 2 "zero_operand")
2247                      (const_string "neon_compare_zero<q>")
2248                      (const_string "neon_compare<q>")))]
2249)
2250
2251(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2252  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2253        (unspec:<V_cmp_result>
2254	  [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2255	   (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2256          NEON_VCMP))]
2257  "TARGET_NEON"
2258  {
2259    char pattern[100];
2260    sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2261                       " %%<V_reg>1, %s",
2262                       which_alternative == 0
2263                         ? "%<V_reg>2" : "#0");
2264    output_asm_insn (pattern, operands);
2265    return "";
2266}
2267  [(set_attr "type" "neon_fp_compare_s<q>")]
2268)
2269
2270(define_insn "neon_vc<cmp_op>u<mode>"
2271  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2272        (neg:<V_cmp_result>
2273          (GTUGEU:<V_cmp_result>
2274	    (match_operand:VDQIW 1 "s_register_operand" "w")
2275	    (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2276  "TARGET_NEON"
2277  "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2278  [(set_attr "type" "neon_compare<q>")]
2279)
2280
2281(define_expand "neon_vca<cmp_op><mode>"
2282  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2283        (neg:<V_cmp_result>
2284          (GTGE:<V_cmp_result>
2285            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2286            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2287  "TARGET_NEON"
2288  {
2289    if (flag_unsafe_math_optimizations)
2290      emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2291                                                  operands[2]));
2292    else
2293      emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2294                                                         operands[1],
2295                                                         operands[2]));
2296    DONE;
2297  }
2298)
2299
2300(define_insn "neon_vca<cmp_op><mode>_insn"
2301  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2302        (neg:<V_cmp_result>
2303          (GTGE:<V_cmp_result>
2304            (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2305            (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2306  "TARGET_NEON && flag_unsafe_math_optimizations"
2307  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2308  [(set_attr "type" "neon_fp_compare_s<q>")]
2309)
2310
2311(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2312  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2313        (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2314		                (match_operand:VCVTF 2 "s_register_operand" "w")]
2315                               NEON_VACMP))]
2316  "TARGET_NEON"
2317  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2318  [(set_attr "type" "neon_fp_compare_s<q>")]
2319)
2320
2321(define_insn "neon_vtst<mode>"
2322  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2323        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2324		       (match_operand:VDQIW 2 "s_register_operand" "w")]
2325		      UNSPEC_VTST))]
2326  "TARGET_NEON"
2327  "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2328  [(set_attr "type" "neon_tst<q>")]
2329)
2330
2331(define_insn "neon_vabd<sup><mode>"
2332  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2333        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2334		      (match_operand:VDQIW 2 "s_register_operand" "w")]
2335		     VABD))]
2336  "TARGET_NEON"
2337  "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2338  [(set_attr "type" "neon_abd<q>")]
2339)
2340
2341(define_insn "neon_vabdf<mode>"
2342  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2343        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2344		      (match_operand:VCVTF 2 "s_register_operand" "w")]
2345		     UNSPEC_VABD_F))]
2346  "TARGET_NEON"
2347  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2348  [(set_attr "type" "neon_fp_abd_s<q>")]
2349)
2350
2351(define_insn "neon_vabdl<sup><mode>"
2352  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2353        (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2354		           (match_operand:VW 2 "s_register_operand" "w")]
2355                          VABDL))]
2356  "TARGET_NEON"
2357  "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2358  [(set_attr "type" "neon_abd_long")]
2359)
2360
2361(define_insn "neon_vaba<sup><mode>"
2362  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2363        (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2364		                   (match_operand:VDQIW 3 "s_register_operand" "w")]
2365		                  VABD)
2366		    (match_operand:VDQIW 1 "s_register_operand" "0")))]
2367  "TARGET_NEON"
2368  "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2369  [(set_attr "type" "neon_arith_acc<q>")]
2370)
2371
2372(define_insn "neon_vabal<sup><mode>"
2373  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2374        (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2375                                           (match_operand:VW 3 "s_register_operand" "w")]
2376					   VABDL)
2377			 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2378  "TARGET_NEON"
2379  "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2380  [(set_attr "type" "neon_arith_acc<q>")]
2381)
2382
2383(define_insn "neon_v<maxmin><sup><mode>"
2384  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2385        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2386		      (match_operand:VDQIW 2 "s_register_operand" "w")]
2387                     VMAXMIN))]
2388  "TARGET_NEON"
2389  "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2390  [(set_attr "type" "neon_minmax<q>")]
2391)
2392
2393(define_insn "neon_v<maxmin>f<mode>"
2394  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2395        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2396		      (match_operand:VCVTF 2 "s_register_operand" "w")]
2397                     VMAXMINF))]
2398  "TARGET_NEON"
2399  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2400  [(set_attr "type" "neon_fp_minmax_s<q>")]
2401)
2402
2403;; Vector forms for the IEEE-754 fmax()/fmin() functions
2404(define_insn "<fmaxmin><mode>3"
2405  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2406	(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2407		       (match_operand:VCVTF 2 "s_register_operand" "w")]
2408		       VMAXMINFNM))]
2409  "TARGET_NEON && TARGET_FPU_ARMV8"
2410  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2411  [(set_attr "type" "neon_fp_minmax_s<q>")]
2412)
2413
2414(define_expand "neon_vpadd<mode>"
2415  [(match_operand:VD 0 "s_register_operand" "=w")
2416   (match_operand:VD 1 "s_register_operand" "w")
2417   (match_operand:VD 2 "s_register_operand" "w")]
2418  "TARGET_NEON"
2419{
2420  emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2421					    operands[2]));
2422  DONE;
2423})
2424
2425(define_insn "neon_vpaddl<sup><mode>"
2426  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2427        (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2428                                 VPADDL))]
2429  "TARGET_NEON"
2430  "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2431  [(set_attr "type" "neon_reduc_add_long")]
2432)
2433
2434(define_insn "neon_vpadal<sup><mode>"
2435  [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2436        (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2437                                  (match_operand:VDQIW 2 "s_register_operand" "w")]
2438                                 VPADAL))]
2439  "TARGET_NEON"
2440  "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2441  [(set_attr "type" "neon_reduc_add_acc")]
2442)
2443
2444(define_insn "neon_vp<maxmin><sup><mode>"
2445  [(set (match_operand:VDI 0 "s_register_operand" "=w")
2446        (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2447		    (match_operand:VDI 2 "s_register_operand" "w")]
2448                   VPMAXMIN))]
2449  "TARGET_NEON"
2450  "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2451  [(set_attr "type" "neon_reduc_minmax<q>")]
2452)
2453
2454(define_insn "neon_vp<maxmin>f<mode>"
2455  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2456        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2457		    (match_operand:VCVTF 2 "s_register_operand" "w")]
2458                   VPMAXMINF))]
2459  "TARGET_NEON"
2460  "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2461  [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2462)
2463
2464(define_insn "neon_vrecps<mode>"
2465  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2466        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2467		       (match_operand:VCVTF 2 "s_register_operand" "w")]
2468                      UNSPEC_VRECPS))]
2469  "TARGET_NEON"
2470  "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2471  [(set_attr "type" "neon_fp_recps_s<q>")]
2472)
2473
2474(define_insn "neon_vrsqrts<mode>"
2475  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2476        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2477		       (match_operand:VCVTF 2 "s_register_operand" "w")]
2478                      UNSPEC_VRSQRTS))]
2479  "TARGET_NEON"
2480  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2481  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2482)
2483
2484(define_expand "neon_vabs<mode>"
2485  [(match_operand:VDQW 0 "s_register_operand" "")
2486   (match_operand:VDQW 1 "s_register_operand" "")]
2487  "TARGET_NEON"
2488{
2489  emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2490  DONE;
2491})
2492
2493(define_insn "neon_vqabs<mode>"
2494  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2495	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2496		      UNSPEC_VQABS))]
2497  "TARGET_NEON"
2498  "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2499  [(set_attr "type" "neon_qabs<q>")]
2500)
2501
2502(define_insn "neon_bswap<mode>"
2503  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2504        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2505  "TARGET_NEON"
2506  "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2507  [(set_attr "type" "neon_rev<q>")]
2508)
2509
2510(define_expand "neon_vneg<mode>"
2511  [(match_operand:VDQW 0 "s_register_operand" "")
2512   (match_operand:VDQW 1 "s_register_operand" "")]
2513  "TARGET_NEON"
2514{
2515  emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2516  DONE;
2517})
2518
2519(define_expand "neon_copysignf<mode>"
2520  [(match_operand:VCVTF 0 "register_operand")
2521   (match_operand:VCVTF 1 "register_operand")
2522   (match_operand:VCVTF 2 "register_operand")]
2523  "TARGET_NEON"
2524  "{
2525     rtx v_bitmask_cast;
2526     rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2527     int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2528     rtvec v = rtvec_alloc (n_elt);
2529
2530     /* Create bitmask for vector select.  */
2531     for (i = 0; i < n_elt; ++i)
2532       RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2533
2534     emit_move_insn (v_bitmask,
2535		     gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2536     emit_move_insn (operands[0], operands[2]);
2537     v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2538					   <VCVTF:V_cmp_result>mode, 0);
2539     emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2540				     operands[1]));
2541
2542     DONE;
2543  }"
2544)
2545
2546(define_insn "neon_vqneg<mode>"
2547  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2548	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2549		      UNSPEC_VQNEG))]
2550  "TARGET_NEON"
2551  "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2552  [(set_attr "type" "neon_qneg<q>")]
2553)
2554
2555(define_insn "neon_vcls<mode>"
2556  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2557	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2558		      UNSPEC_VCLS))]
2559  "TARGET_NEON"
2560  "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2561  [(set_attr "type" "neon_cls<q>")]
2562)
2563
2564(define_insn "clz<mode>2"
2565  [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2566        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2567  "TARGET_NEON"
2568  "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2569  [(set_attr "type" "neon_cnt<q>")]
2570)
2571
2572(define_expand "neon_vclz<mode>"
2573  [(match_operand:VDQIW 0 "s_register_operand" "")
2574   (match_operand:VDQIW 1 "s_register_operand" "")]
2575  "TARGET_NEON"
2576{
2577  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2578  DONE;
2579})
2580
2581(define_insn "popcount<mode>2"
2582  [(set (match_operand:VE 0 "s_register_operand" "=w")
2583        (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2584  "TARGET_NEON"
2585  "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2586  [(set_attr "type" "neon_cnt<q>")]
2587)
2588
2589(define_expand "neon_vcnt<mode>"
2590  [(match_operand:VE 0 "s_register_operand" "=w")
2591   (match_operand:VE 1 "s_register_operand" "w")]
2592  "TARGET_NEON"
2593{
2594  emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2595  DONE;
2596})
2597
2598(define_insn "neon_vrecpe<mode>"
2599  [(set (match_operand:V32 0 "s_register_operand" "=w")
2600	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2601                    UNSPEC_VRECPE))]
2602  "TARGET_NEON"
2603  "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2604  [(set_attr "type" "neon_fp_recpe_s<q>")]
2605)
2606
2607(define_insn "neon_vrsqrte<mode>"
2608  [(set (match_operand:V32 0 "s_register_operand" "=w")
2609	(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2610                    UNSPEC_VRSQRTE))]
2611  "TARGET_NEON"
2612  "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2613  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2614)
2615
2616(define_expand "neon_vmvn<mode>"
2617  [(match_operand:VDQIW 0 "s_register_operand" "")
2618   (match_operand:VDQIW 1 "s_register_operand" "")]
2619  "TARGET_NEON"
2620{
2621  emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2622  DONE;
2623})
2624
2625(define_insn "neon_vget_lane<mode>_sext_internal"
2626  [(set (match_operand:SI 0 "s_register_operand" "=r")
2627	(sign_extend:SI
2628	  (vec_select:<V_elem>
2629	    (match_operand:VD 1 "s_register_operand" "w")
2630	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2631  "TARGET_NEON"
2632{
2633  if (BYTES_BIG_ENDIAN)
2634    {
2635      int elt = INTVAL (operands[2]);
2636      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2637      operands[2] = GEN_INT (elt);
2638    }
2639  return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2640}
2641  [(set_attr "type" "neon_to_gp")]
2642)
2643
2644(define_insn "neon_vget_lane<mode>_zext_internal"
2645  [(set (match_operand:SI 0 "s_register_operand" "=r")
2646	(zero_extend:SI
2647	  (vec_select:<V_elem>
2648	    (match_operand:VD 1 "s_register_operand" "w")
2649	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2650  "TARGET_NEON"
2651{
2652  if (BYTES_BIG_ENDIAN)
2653    {
2654      int elt = INTVAL (operands[2]);
2655      elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2656      operands[2] = GEN_INT (elt);
2657    }
2658  return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2659}
2660  [(set_attr "type" "neon_to_gp")]
2661)
2662
2663(define_insn "neon_vget_lane<mode>_sext_internal"
2664  [(set (match_operand:SI 0 "s_register_operand" "=r")
2665	(sign_extend:SI
2666	  (vec_select:<V_elem>
2667	    (match_operand:VQ2 1 "s_register_operand" "w")
2668	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2669  "TARGET_NEON"
2670{
2671  rtx ops[3];
2672  int regno = REGNO (operands[1]);
2673  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2674  unsigned int elt = INTVAL (operands[2]);
2675  unsigned int elt_adj = elt % halfelts;
2676
2677  if (BYTES_BIG_ENDIAN)
2678    elt_adj = halfelts - 1 - elt_adj;
2679
2680  ops[0] = operands[0];
2681  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2682  ops[2] = GEN_INT (elt_adj);
2683  output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2684
2685  return "";
2686}
2687  [(set_attr "type" "neon_to_gp_q")]
2688)
2689
2690(define_insn "neon_vget_lane<mode>_zext_internal"
2691  [(set (match_operand:SI 0 "s_register_operand" "=r")
2692	(zero_extend:SI
2693	  (vec_select:<V_elem>
2694	    (match_operand:VQ2 1 "s_register_operand" "w")
2695	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2696  "TARGET_NEON"
2697{
2698  rtx ops[3];
2699  int regno = REGNO (operands[1]);
2700  unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2701  unsigned int elt = INTVAL (operands[2]);
2702  unsigned int elt_adj = elt % halfelts;
2703
2704  if (BYTES_BIG_ENDIAN)
2705    elt_adj = halfelts - 1 - elt_adj;
2706
2707  ops[0] = operands[0];
2708  ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2709  ops[2] = GEN_INT (elt_adj);
2710  output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2711
2712  return "";
2713}
2714  [(set_attr "type" "neon_to_gp_q")]
2715)
2716
2717(define_expand "neon_vget_lane<mode>"
2718  [(match_operand:<V_ext> 0 "s_register_operand" "")
2719   (match_operand:VDQW 1 "s_register_operand" "")
2720   (match_operand:SI 2 "immediate_operand" "")]
2721  "TARGET_NEON"
2722{
2723  if (BYTES_BIG_ENDIAN)
2724    {
2725      /* The intrinsics are defined in terms of a model where the
2726	 element ordering in memory is vldm order, whereas the generic
2727	 RTL is defined in terms of a model where the element ordering
2728	 in memory is array order.  Convert the lane number to conform
2729	 to this model.  */
2730      unsigned int elt = INTVAL (operands[2]);
2731      unsigned int reg_nelts
2732	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2733      elt ^= reg_nelts - 1;
2734      operands[2] = GEN_INT (elt);
2735    }
2736
2737  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2738    emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2739  else
2740    emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2741						       operands[1],
2742						       operands[2]));
2743  DONE;
2744})
2745
2746(define_expand "neon_vget_laneu<mode>"
2747  [(match_operand:<V_ext> 0 "s_register_operand" "")
2748   (match_operand:VDQIW 1 "s_register_operand" "")
2749   (match_operand:SI 2 "immediate_operand" "")]
2750  "TARGET_NEON"
2751{
2752  if (BYTES_BIG_ENDIAN)
2753    {
2754      /* The intrinsics are defined in terms of a model where the
2755	 element ordering in memory is vldm order, whereas the generic
2756	 RTL is defined in terms of a model where the element ordering
2757	 in memory is array order.  Convert the lane number to conform
2758	 to this model.  */
2759      unsigned int elt = INTVAL (operands[2]);
2760      unsigned int reg_nelts
2761	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2762      elt ^= reg_nelts - 1;
2763      operands[2] = GEN_INT (elt);
2764    }
2765
2766  if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2767    emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2768  else
2769    emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2770						       operands[1],
2771						       operands[2]));
2772  DONE;
2773})
2774
2775(define_expand "neon_vget_lanedi"
2776  [(match_operand:DI 0 "s_register_operand" "=r")
2777   (match_operand:DI 1 "s_register_operand" "w")
2778   (match_operand:SI 2 "immediate_operand" "")]
2779  "TARGET_NEON"
2780{
2781  emit_move_insn (operands[0], operands[1]);
2782  DONE;
2783})
2784
2785(define_expand "neon_vget_lanev2di"
2786  [(match_operand:DI 0 "s_register_operand" "")
2787   (match_operand:V2DI 1 "s_register_operand" "")
2788   (match_operand:SI 2 "immediate_operand" "")]
2789  "TARGET_NEON"
2790{
2791  int lane;
2792
2793if (BYTES_BIG_ENDIAN)
2794    {
2795      /* The intrinsics are defined in terms of a model where the
2796	 element ordering in memory is vldm order, whereas the generic
2797	 RTL is defined in terms of a model where the element ordering
2798	 in memory is array order.  Convert the lane number to conform
2799	 to this model.  */
2800      unsigned int elt = INTVAL (operands[2]);
2801      unsigned int reg_nelts = 2;
2802      elt ^= reg_nelts - 1;
2803      operands[2] = GEN_INT (elt);
2804    }
2805
2806  lane = INTVAL (operands[2]);
2807  gcc_assert ((lane ==0) || (lane == 1));
2808  emit_move_insn (operands[0], lane == 0
2809				? gen_lowpart (DImode, operands[1])
2810				: gen_highpart (DImode, operands[1]));
2811  DONE;
2812})
2813
2814(define_expand "neon_vset_lane<mode>"
2815  [(match_operand:VDQ 0 "s_register_operand" "=w")
2816   (match_operand:<V_elem> 1 "s_register_operand" "r")
2817   (match_operand:VDQ 2 "s_register_operand" "0")
2818   (match_operand:SI 3 "immediate_operand" "i")]
2819  "TARGET_NEON"
2820{
2821  unsigned int elt = INTVAL (operands[3]);
2822
2823  if (BYTES_BIG_ENDIAN)
2824    {
2825      unsigned int reg_nelts
2826	= 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2827      elt ^= reg_nelts - 1;
2828    }
2829
2830  emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2831                                         GEN_INT (1 << elt), operands[2]));
2832  DONE;
2833})
2834
2835; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2836
2837(define_expand "neon_vset_lanedi"
2838  [(match_operand:DI 0 "s_register_operand" "=w")
2839   (match_operand:DI 1 "s_register_operand" "r")
2840   (match_operand:DI 2 "s_register_operand" "0")
2841   (match_operand:SI 3 "immediate_operand" "i")]
2842  "TARGET_NEON"
2843{
2844  emit_move_insn (operands[0], operands[1]);
2845  DONE;
2846})
2847
2848(define_expand "neon_vcreate<mode>"
2849  [(match_operand:VD_RE 0 "s_register_operand" "")
2850   (match_operand:DI 1 "general_operand" "")]
2851  "TARGET_NEON"
2852{
2853  rtx src = gen_lowpart (<MODE>mode, operands[1]);
2854  emit_move_insn (operands[0], src);
2855  DONE;
2856})
2857
2858(define_insn "neon_vdup_n<mode>"
2859  [(set (match_operand:VX 0 "s_register_operand" "=w")
2860        (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2861  "TARGET_NEON"
2862  "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2863  [(set_attr "type" "neon_from_gp<q>")]
2864)
2865
2866(define_insn "neon_vdup_nv4hf"
2867  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2868        (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
2869  "TARGET_NEON"
2870  "vdup.16\t%P0, %1"
2871  [(set_attr "type" "neon_from_gp")]
2872)
2873
2874(define_insn "neon_vdup_nv8hf"
2875  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
2876        (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
2877  "TARGET_NEON"
2878  "vdup.16\t%q0, %1"
2879  [(set_attr "type" "neon_from_gp_q")]
2880)
2881
2882(define_insn "neon_vdup_n<mode>"
2883  [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2884        (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
2885  "TARGET_NEON"
2886  "@
2887  vdup.<V_sz_elem>\t%<V_reg>0, %1
2888  vdup.<V_sz_elem>\t%<V_reg>0, %y1"
2889  [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
2890)
2891
2892(define_expand "neon_vdup_ndi"
2893  [(match_operand:DI 0 "s_register_operand" "=w")
2894   (match_operand:DI 1 "s_register_operand" "r")]
2895  "TARGET_NEON"
2896{
2897  emit_move_insn (operands[0], operands[1]);
2898  DONE;
2899}
2900)
2901
2902(define_insn "neon_vdup_nv2di"
2903  [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
2904        (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
2905  "TARGET_NEON"
2906  "@
2907  vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
2908  vmov\t%e0, %P1\;vmov\t%f0, %P1"
2909  [(set_attr "length" "8")
2910   (set_attr "type" "multiple")]
2911)
2912
2913(define_insn "neon_vdup_lane<mode>_internal"
2914  [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2915  	(vec_duplicate:VDQW
2916          (vec_select:<V_elem>
2917            (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2918            (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2919  "TARGET_NEON"
2920{
2921  if (BYTES_BIG_ENDIAN)
2922    {
2923      int elt = INTVAL (operands[2]);
2924      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
2925      operands[2] = GEN_INT (elt);
2926    }
2927  if (<Is_d_reg>)
2928    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
2929  else
2930    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
2931}
2932  [(set_attr "type" "neon_dup<q>")]
2933)
2934
2935(define_expand "neon_vdup_lane<mode>"
2936  [(match_operand:VDQW 0 "s_register_operand" "=w")
2937   (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
2938   (match_operand:SI 2 "immediate_operand" "i")]
2939  "TARGET_NEON"
2940{
2941  if (BYTES_BIG_ENDIAN)
2942    {
2943      unsigned int elt = INTVAL (operands[2]);
2944      unsigned int reg_nelts
2945	= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
2946      elt ^= reg_nelts - 1;
2947      operands[2] = GEN_INT (elt);
2948    }
2949    emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
2950                                                  operands[2]));
2951    DONE;
2952})
2953
2954; Scalar index is ignored, since only zero is valid here.
2955(define_expand "neon_vdup_lanedi"
2956  [(match_operand:DI 0 "s_register_operand" "=w")
2957   (match_operand:DI 1 "s_register_operand" "w")
2958   (match_operand:SI 2 "immediate_operand" "i")]
2959  "TARGET_NEON"
2960{
2961  emit_move_insn (operands[0], operands[1]);
2962  DONE;
2963})
2964
2965; Likewise for v2di, as the DImode second operand has only a single element.
2966(define_expand "neon_vdup_lanev2di"
2967  [(match_operand:V2DI 0 "s_register_operand" "=w")
2968   (match_operand:DI 1 "s_register_operand" "w")
2969   (match_operand:SI 2 "immediate_operand" "i")]
2970  "TARGET_NEON"
2971{
2972  emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
2973  DONE;
2974})
2975
2976; Disabled before reload because we don't want combine doing something silly,
2977; but used by the post-reload expansion of neon_vcombine.
2978(define_insn "*neon_vswp<mode>"
2979  [(set (match_operand:VDQX 0 "s_register_operand" "+w")
2980	(match_operand:VDQX 1 "s_register_operand" "+w"))
2981   (set (match_dup 1) (match_dup 0))]
2982  "TARGET_NEON && reload_completed"
2983  "vswp\t%<V_reg>0, %<V_reg>1"
2984  [(set_attr "type" "neon_permute<q>")]
2985)
2986
2987;; In this insn, operand 1 should be low, and operand 2 the high part of the
2988;; dest vector.
2989;; FIXME: A different implementation of this builtin could make it much
2990;; more likely that we wouldn't actually need to output anything (we could make
2991;; it so that the reg allocator puts things in the right places magically
2992;; instead). Lack of subregs for vectors makes that tricky though, I think.
2993
2994(define_insn_and_split "neon_vcombine<mode>"
2995  [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
2996        (vec_concat:<V_DOUBLE>
2997	  (match_operand:VDX 1 "s_register_operand" "w")
2998	  (match_operand:VDX 2 "s_register_operand" "w")))]
2999  "TARGET_NEON"
3000  "#"
3001  "&& reload_completed"
3002  [(const_int 0)]
3003{
3004  neon_split_vcombine (operands);
3005  DONE;
3006}
3007[(set_attr "type" "multiple")]
3008)
3009
3010(define_expand "neon_vget_high<mode>"
3011  [(match_operand:<V_HALF> 0 "s_register_operand")
3012   (match_operand:VQX 1 "s_register_operand")]
3013  "TARGET_NEON"
3014{
3015  emit_move_insn (operands[0],
3016		  simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3017				       GET_MODE_SIZE (<V_HALF>mode)));
3018  DONE;
3019})
3020
3021(define_expand "neon_vget_low<mode>"
3022  [(match_operand:<V_HALF> 0 "s_register_operand")
3023   (match_operand:VQX 1 "s_register_operand")]
3024  "TARGET_NEON"
3025{
3026  emit_move_insn (operands[0],
3027		  simplify_gen_subreg (<V_HALF>mode, operands[1],
3028				       <MODE>mode, 0));
3029  DONE;
3030})
3031
3032(define_insn "float<mode><V_cvtto>2"
3033  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3034        (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3035  "TARGET_NEON && !flag_rounding_math"
3036  "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3037  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3038)
3039
3040(define_insn "floatuns<mode><V_cvtto>2"
3041  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3042        (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3043  "TARGET_NEON && !flag_rounding_math"
3044  "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3045  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3046)
3047
3048(define_insn "fix_trunc<mode><V_cvtto>2"
3049  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3050        (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3051  "TARGET_NEON"
3052  "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3053  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3054)
3055
3056(define_insn "fixuns_trunc<mode><V_cvtto>2"
3057  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3058        (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3059  "TARGET_NEON"
3060  "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3061  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3062)
3063
3064(define_insn "neon_vcvt<sup><mode>"
3065  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3066	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3067			  VCVT_US))]
3068  "TARGET_NEON"
3069  "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3070  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3071)
3072
3073(define_insn "neon_vcvt<sup><mode>"
3074  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3075	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3076			  VCVT_US))]
3077  "TARGET_NEON"
3078  "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3079  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3080)
3081
3082(define_insn "neon_vcvtv4sfv4hf"
3083  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3084	(unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3085			  UNSPEC_VCVT))]
3086  "TARGET_NEON && TARGET_FP16"
3087  "vcvt.f32.f16\t%q0, %P1"
3088  [(set_attr "type" "neon_fp_cvt_widen_h")]
3089)
3090
3091(define_insn "neon_vcvtv4hfv4sf"
3092  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3093	(unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3094			  UNSPEC_VCVT))]
3095  "TARGET_NEON && TARGET_FP16"
3096  "vcvt.f16.f32\t%P0, %q1"
3097  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3098)
3099
3100(define_insn "neon_vcvt<sup>_n<mode>"
3101  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3102	(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3103			   (match_operand:SI 2 "immediate_operand" "i")]
3104			  VCVT_US_N))]
3105  "TARGET_NEON"
3106{
3107  neon_const_bounds (operands[2], 1, 33);
3108  return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3109}
3110  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3111)
3112
3113(define_insn "neon_vcvt<sup>_n<mode>"
3114  [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3115	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3116			   (match_operand:SI 2 "immediate_operand" "i")]
3117			  VCVT_US_N))]
3118  "TARGET_NEON"
3119{
3120  neon_const_bounds (operands[2], 1, 33);
3121  return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3122}
3123  [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3124)
3125
3126(define_insn "neon_vmovn<mode>"
3127  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3128	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3129                           UNSPEC_VMOVN))]
3130  "TARGET_NEON"
3131  "vmovn.<V_if_elem>\t%P0, %q1"
3132  [(set_attr "type" "neon_shift_imm_narrow_q")]
3133)
3134
3135(define_insn "neon_vqmovn<sup><mode>"
3136  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3137	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3138                           VQMOVN))]
3139  "TARGET_NEON"
3140  "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3141  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3142)
3143
3144(define_insn "neon_vqmovun<mode>"
3145  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3146	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3147                           UNSPEC_VQMOVUN))]
3148  "TARGET_NEON"
3149  "vqmovun.<V_s_elem>\t%P0, %q1"
3150  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3151)
3152
3153(define_insn "neon_vmovl<sup><mode>"
3154  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3155	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3156                          VMOVL))]
3157  "TARGET_NEON"
3158  "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3159  [(set_attr "type" "neon_shift_imm_long")]
3160)
3161
3162(define_insn "neon_vmul_lane<mode>"
3163  [(set (match_operand:VMD 0 "s_register_operand" "=w")
3164	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3165		     (match_operand:VMD 2 "s_register_operand"
3166                                        "<scalar_mul_constraint>")
3167                     (match_operand:SI 3 "immediate_operand" "i")]
3168                    UNSPEC_VMUL_LANE))]
3169  "TARGET_NEON"
3170{
3171  return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3172}
3173  [(set (attr "type")
3174     (if_then_else (match_test "<Is_float_mode>")
3175                   (const_string "neon_fp_mul_s_scalar<q>")
3176                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3177)
3178
3179(define_insn "neon_vmul_lane<mode>"
3180  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3181	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3182		     (match_operand:<V_HALF> 2 "s_register_operand"
3183                                             "<scalar_mul_constraint>")
3184                     (match_operand:SI 3 "immediate_operand" "i")]
3185                    UNSPEC_VMUL_LANE))]
3186  "TARGET_NEON"
3187{
3188  return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3189}
3190  [(set (attr "type")
3191     (if_then_else (match_test "<Is_float_mode>")
3192                   (const_string "neon_fp_mul_s_scalar<q>")
3193                   (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3194)
3195
3196(define_insn "neon_vmull<sup>_lane<mode>"
3197  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3198	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3199		           (match_operand:VMDI 2 "s_register_operand"
3200					       "<scalar_mul_constraint>")
3201                           (match_operand:SI 3 "immediate_operand" "i")]
3202                          VMULL_LANE))]
3203  "TARGET_NEON"
3204{
3205  return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3206}
3207  [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3208)
3209
3210(define_insn "neon_vqdmull_lane<mode>"
3211  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3212	(unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3213		           (match_operand:VMDI 2 "s_register_operand"
3214					       "<scalar_mul_constraint>")
3215                           (match_operand:SI 3 "immediate_operand" "i")]
3216                          UNSPEC_VQDMULL_LANE))]
3217  "TARGET_NEON"
3218{
3219  return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3220}
3221  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3222)
3223
3224(define_insn "neon_vq<r>dmulh_lane<mode>"
3225  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3226	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3227		      (match_operand:<V_HALF> 2 "s_register_operand"
3228					      "<scalar_mul_constraint>")
3229                      (match_operand:SI 3 "immediate_operand" "i")]
3230                      VQDMULH_LANE))]
3231  "TARGET_NEON"
3232{
3233  return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3234}
3235  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3236)
3237
3238(define_insn "neon_vq<r>dmulh_lane<mode>"
3239  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3240	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3241		      (match_operand:VMDI 2 "s_register_operand"
3242					  "<scalar_mul_constraint>")
3243                      (match_operand:SI 3 "immediate_operand" "i")]
3244                      VQDMULH_LANE))]
3245  "TARGET_NEON"
3246{
3247  return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3248}
3249  [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3250)
3251
3252;; vqrdmlah_lane, vqrdmlsh_lane
3253(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3254  [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3255	(unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3256		      (match_operand:VMQI 2 "s_register_operand" "w")
3257		      (match_operand:<V_HALF> 3 "s_register_operand"
3258					  "<scalar_mul_constraint>")
3259		      (match_operand:SI 4 "immediate_operand" "i")]
3260		     VQRDMLH_AS))]
3261  "TARGET_NEON_RDMA"
3262{
3263  return
3264   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3265}
3266  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3267)
3268
3269(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3270  [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3271	(unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3272		      (match_operand:VMDI 2 "s_register_operand" "w")
3273		      (match_operand:VMDI 3 "s_register_operand"
3274					  "<scalar_mul_constraint>")
3275		      (match_operand:SI 4 "immediate_operand" "i")]
3276		     VQRDMLH_AS))]
3277  "TARGET_NEON_RDMA"
3278{
3279  return
3280   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3281}
3282  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3283)
3284
3285(define_insn "neon_vmla_lane<mode>"
3286  [(set (match_operand:VMD 0 "s_register_operand" "=w")
3287	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3288		     (match_operand:VMD 2 "s_register_operand" "w")
3289                     (match_operand:VMD 3 "s_register_operand"
3290					"<scalar_mul_constraint>")
3291                     (match_operand:SI 4 "immediate_operand" "i")]
3292                     UNSPEC_VMLA_LANE))]
3293  "TARGET_NEON"
3294{
3295  return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3296}
3297  [(set (attr "type")
3298     (if_then_else (match_test "<Is_float_mode>")
3299                   (const_string "neon_fp_mla_s_scalar<q>")
3300                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3301)
3302
3303(define_insn "neon_vmla_lane<mode>"
3304  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3305	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3306		     (match_operand:VMQ 2 "s_register_operand" "w")
3307                     (match_operand:<V_HALF> 3 "s_register_operand"
3308					     "<scalar_mul_constraint>")
3309                     (match_operand:SI 4 "immediate_operand" "i")]
3310                     UNSPEC_VMLA_LANE))]
3311  "TARGET_NEON"
3312{
3313  return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3314}
3315  [(set (attr "type")
3316     (if_then_else (match_test "<Is_float_mode>")
3317                   (const_string "neon_fp_mla_s_scalar<q>")
3318                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3319)
3320
3321(define_insn "neon_vmlal<sup>_lane<mode>"
3322  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3323	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3324			   (match_operand:VMDI 2 "s_register_operand" "w")
3325                           (match_operand:VMDI 3 "s_register_operand"
3326					       "<scalar_mul_constraint>")
3327                           (match_operand:SI 4 "immediate_operand" "i")]
3328                          VMLAL_LANE))]
3329  "TARGET_NEON"
3330{
3331  return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3332}
3333  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3334)
3335
3336(define_insn "neon_vqdmlal_lane<mode>"
3337  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3338	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3339			   (match_operand:VMDI 2 "s_register_operand" "w")
3340                           (match_operand:VMDI 3 "s_register_operand"
3341					       "<scalar_mul_constraint>")
3342                           (match_operand:SI 4 "immediate_operand" "i")]
3343                          UNSPEC_VQDMLAL_LANE))]
3344  "TARGET_NEON"
3345{
3346  return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3347}
3348  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3349)
3350
3351(define_insn "neon_vmls_lane<mode>"
3352  [(set (match_operand:VMD 0 "s_register_operand" "=w")
3353	(unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3354		     (match_operand:VMD 2 "s_register_operand" "w")
3355                     (match_operand:VMD 3 "s_register_operand"
3356					"<scalar_mul_constraint>")
3357                     (match_operand:SI 4 "immediate_operand" "i")]
3358                    UNSPEC_VMLS_LANE))]
3359  "TARGET_NEON"
3360{
3361  return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3362}
3363  [(set (attr "type")
3364     (if_then_else (match_test "<Is_float_mode>")
3365                   (const_string "neon_fp_mla_s_scalar<q>")
3366                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3367)
3368
3369(define_insn "neon_vmls_lane<mode>"
3370  [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3371	(unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3372		     (match_operand:VMQ 2 "s_register_operand" "w")
3373                     (match_operand:<V_HALF> 3 "s_register_operand"
3374					     "<scalar_mul_constraint>")
3375                     (match_operand:SI 4 "immediate_operand" "i")]
3376                    UNSPEC_VMLS_LANE))]
3377  "TARGET_NEON"
3378{
3379  return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3380}
3381  [(set (attr "type")
3382     (if_then_else (match_test "<Is_float_mode>")
3383                   (const_string "neon_fp_mla_s_scalar<q>")
3384                   (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3385)
3386
3387(define_insn "neon_vmlsl<sup>_lane<mode>"
3388  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3389	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3390			   (match_operand:VMDI 2 "s_register_operand" "w")
3391                           (match_operand:VMDI 3 "s_register_operand"
3392					       "<scalar_mul_constraint>")
3393                           (match_operand:SI 4 "immediate_operand" "i")]
3394                          VMLSL_LANE))]
3395  "TARGET_NEON"
3396{
3397  return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3398}
3399  [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3400)
3401
3402(define_insn "neon_vqdmlsl_lane<mode>"
3403  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3404	(unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3405			   (match_operand:VMDI 2 "s_register_operand" "w")
3406                           (match_operand:VMDI 3 "s_register_operand"
3407					       "<scalar_mul_constraint>")
3408                           (match_operand:SI 4 "immediate_operand" "i")]
3409                          UNSPEC_VQDMLSL_LANE))]
3410  "TARGET_NEON"
3411{
3412  return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3413}
3414  [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3415)
3416
3417; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3418; core register into a temp register, then use a scalar taken from that. This
3419; isn't an optimal solution if e.g. the scalar has just been read from memory
3420; or extracted from another vector. The latter case it's currently better to
3421; use the "_lane" variant, and the former case can probably be implemented
3422; using vld1_lane, but that hasn't been done yet.
3423
3424(define_expand "neon_vmul_n<mode>"
3425  [(match_operand:VMD 0 "s_register_operand" "")
3426   (match_operand:VMD 1 "s_register_operand" "")
3427   (match_operand:<V_elem> 2 "s_register_operand" "")]
3428  "TARGET_NEON"
3429{
3430  rtx tmp = gen_reg_rtx (<MODE>mode);
3431  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3432  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3433				       const0_rtx));
3434  DONE;
3435})
3436
3437(define_expand "neon_vmul_n<mode>"
3438  [(match_operand:VMQ 0 "s_register_operand" "")
3439   (match_operand:VMQ 1 "s_register_operand" "")
3440   (match_operand:<V_elem> 2 "s_register_operand" "")]
3441  "TARGET_NEON"
3442{
3443  rtx tmp = gen_reg_rtx (<V_HALF>mode);
3444  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3445  emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3446				       const0_rtx));
3447  DONE;
3448})
3449
3450(define_expand "neon_vmulls_n<mode>"
3451  [(match_operand:<V_widen> 0 "s_register_operand" "")
3452   (match_operand:VMDI 1 "s_register_operand" "")
3453   (match_operand:<V_elem> 2 "s_register_operand" "")]
3454  "TARGET_NEON"
3455{
3456  rtx tmp = gen_reg_rtx (<MODE>mode);
3457  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3458  emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3459					 const0_rtx));
3460  DONE;
3461})
3462
3463(define_expand "neon_vmullu_n<mode>"
3464  [(match_operand:<V_widen> 0 "s_register_operand" "")
3465   (match_operand:VMDI 1 "s_register_operand" "")
3466   (match_operand:<V_elem> 2 "s_register_operand" "")]
3467  "TARGET_NEON"
3468{
3469  rtx tmp = gen_reg_rtx (<MODE>mode);
3470  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3471  emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3472					 const0_rtx));
3473  DONE;
3474})
3475
3476(define_expand "neon_vqdmull_n<mode>"
3477  [(match_operand:<V_widen> 0 "s_register_operand" "")
3478   (match_operand:VMDI 1 "s_register_operand" "")
3479   (match_operand:<V_elem> 2 "s_register_operand" "")]
3480  "TARGET_NEON"
3481{
3482  rtx tmp = gen_reg_rtx (<MODE>mode);
3483  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3484  emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3485				          const0_rtx));
3486  DONE;
3487})
3488
3489(define_expand "neon_vqdmulh_n<mode>"
3490  [(match_operand:VMDI 0 "s_register_operand" "")
3491   (match_operand:VMDI 1 "s_register_operand" "")
3492   (match_operand:<V_elem> 2 "s_register_operand" "")]
3493  "TARGET_NEON"
3494{
3495  rtx tmp = gen_reg_rtx (<MODE>mode);
3496  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3497  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3498				          const0_rtx));
3499  DONE;
3500})
3501
3502(define_expand "neon_vqrdmulh_n<mode>"
3503  [(match_operand:VMDI 0 "s_register_operand" "")
3504   (match_operand:VMDI 1 "s_register_operand" "")
3505   (match_operand:<V_elem> 2 "s_register_operand" "")]
3506  "TARGET_NEON"
3507{
3508  rtx tmp = gen_reg_rtx (<MODE>mode);
3509  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3510  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3511				          const0_rtx));
3512  DONE;
3513})
3514
3515(define_expand "neon_vqdmulh_n<mode>"
3516  [(match_operand:VMQI 0 "s_register_operand" "")
3517   (match_operand:VMQI 1 "s_register_operand" "")
3518   (match_operand:<V_elem> 2 "s_register_operand" "")]
3519  "TARGET_NEON"
3520{
3521  rtx tmp = gen_reg_rtx (<V_HALF>mode);
3522  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3523  emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3524					  const0_rtx));
3525  DONE;
3526})
3527
3528(define_expand "neon_vqrdmulh_n<mode>"
3529  [(match_operand:VMQI 0 "s_register_operand" "")
3530   (match_operand:VMQI 1 "s_register_operand" "")
3531   (match_operand:<V_elem> 2 "s_register_operand" "")]
3532  "TARGET_NEON"
3533{
3534  rtx tmp = gen_reg_rtx (<V_HALF>mode);
3535  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3536  emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3537					   const0_rtx));
3538  DONE;
3539})
3540
3541(define_expand "neon_vmla_n<mode>"
3542  [(match_operand:VMD 0 "s_register_operand" "")
3543   (match_operand:VMD 1 "s_register_operand" "")
3544   (match_operand:VMD 2 "s_register_operand" "")
3545   (match_operand:<V_elem> 3 "s_register_operand" "")]
3546  "TARGET_NEON"
3547{
3548  rtx tmp = gen_reg_rtx (<MODE>mode);
3549  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3550  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3551				       tmp, const0_rtx));
3552  DONE;
3553})
3554
3555(define_expand "neon_vmla_n<mode>"
3556  [(match_operand:VMQ 0 "s_register_operand" "")
3557   (match_operand:VMQ 1 "s_register_operand" "")
3558   (match_operand:VMQ 2 "s_register_operand" "")
3559   (match_operand:<V_elem> 3 "s_register_operand" "")]
3560  "TARGET_NEON"
3561{
3562  rtx tmp = gen_reg_rtx (<V_HALF>mode);
3563  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3564  emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3565				       tmp, const0_rtx));
3566  DONE;
3567})
3568
3569(define_expand "neon_vmlals_n<mode>"
3570  [(match_operand:<V_widen> 0 "s_register_operand" "")
3571   (match_operand:<V_widen> 1 "s_register_operand" "")
3572   (match_operand:VMDI 2 "s_register_operand" "")
3573   (match_operand:<V_elem> 3 "s_register_operand" "")]
3574  "TARGET_NEON"
3575{
3576  rtx tmp = gen_reg_rtx (<MODE>mode);
3577  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3578  emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3579					 tmp, const0_rtx));
3580  DONE;
3581})
3582
3583(define_expand "neon_vmlalu_n<mode>"
3584  [(match_operand:<V_widen> 0 "s_register_operand" "")
3585   (match_operand:<V_widen> 1 "s_register_operand" "")
3586   (match_operand:VMDI 2 "s_register_operand" "")
3587   (match_operand:<V_elem> 3 "s_register_operand" "")]
3588  "TARGET_NEON"
3589{
3590  rtx tmp = gen_reg_rtx (<MODE>mode);
3591  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3592  emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3593					 tmp, const0_rtx));
3594  DONE;
3595})
3596
3597(define_expand "neon_vqdmlal_n<mode>"
3598  [(match_operand:<V_widen> 0 "s_register_operand" "")
3599   (match_operand:<V_widen> 1 "s_register_operand" "")
3600   (match_operand:VMDI 2 "s_register_operand" "")
3601   (match_operand:<V_elem> 3 "s_register_operand" "")]
3602  "TARGET_NEON"
3603{
3604  rtx tmp = gen_reg_rtx (<MODE>mode);
3605  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3606  emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3607					  tmp, const0_rtx));
3608  DONE;
3609})
3610
3611(define_expand "neon_vmls_n<mode>"
3612  [(match_operand:VMD 0 "s_register_operand" "")
3613   (match_operand:VMD 1 "s_register_operand" "")
3614   (match_operand:VMD 2 "s_register_operand" "")
3615   (match_operand:<V_elem> 3 "s_register_operand" "")]
3616  "TARGET_NEON"
3617{
3618  rtx tmp = gen_reg_rtx (<MODE>mode);
3619  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3620  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3621				       tmp, const0_rtx));
3622  DONE;
3623})
3624
3625(define_expand "neon_vmls_n<mode>"
3626  [(match_operand:VMQ 0 "s_register_operand" "")
3627   (match_operand:VMQ 1 "s_register_operand" "")
3628   (match_operand:VMQ 2 "s_register_operand" "")
3629   (match_operand:<V_elem> 3 "s_register_operand" "")]
3630  "TARGET_NEON"
3631{
3632  rtx tmp = gen_reg_rtx (<V_HALF>mode);
3633  emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3634  emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3635				       tmp, const0_rtx));
3636  DONE;
3637})
3638
3639(define_expand "neon_vmlsls_n<mode>"
3640  [(match_operand:<V_widen> 0 "s_register_operand" "")
3641   (match_operand:<V_widen> 1 "s_register_operand" "")
3642   (match_operand:VMDI 2 "s_register_operand" "")
3643   (match_operand:<V_elem> 3 "s_register_operand" "")]
3644  "TARGET_NEON"
3645{
3646  rtx tmp = gen_reg_rtx (<MODE>mode);
3647  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3648  emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3649					tmp, const0_rtx));
3650  DONE;
3651})
3652
3653(define_expand "neon_vmlslu_n<mode>"
3654  [(match_operand:<V_widen> 0 "s_register_operand" "")
3655   (match_operand:<V_widen> 1 "s_register_operand" "")
3656   (match_operand:VMDI 2 "s_register_operand" "")
3657   (match_operand:<V_elem> 3 "s_register_operand" "")]
3658  "TARGET_NEON"
3659{
3660  rtx tmp = gen_reg_rtx (<MODE>mode);
3661  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3662  emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3663					tmp, const0_rtx));
3664  DONE;
3665})
3666
3667(define_expand "neon_vqdmlsl_n<mode>"
3668  [(match_operand:<V_widen> 0 "s_register_operand" "")
3669   (match_operand:<V_widen> 1 "s_register_operand" "")
3670   (match_operand:VMDI 2 "s_register_operand" "")
3671   (match_operand:<V_elem> 3 "s_register_operand" "")]
3672  "TARGET_NEON"
3673{
3674  rtx tmp = gen_reg_rtx (<MODE>mode);
3675  emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3676  emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3677					  tmp, const0_rtx));
3678  DONE;
3679})
3680
3681(define_insn "neon_vext<mode>"
3682  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3683	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3684		      (match_operand:VDQX 2 "s_register_operand" "w")
3685                      (match_operand:SI 3 "immediate_operand" "i")]
3686                     UNSPEC_VEXT))]
3687  "TARGET_NEON"
3688{
3689  neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3690  return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3691}
3692  [(set_attr "type" "neon_ext<q>")]
3693)
3694
3695(define_insn "neon_vrev64<mode>"
3696  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3697	(unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3698                    UNSPEC_VREV64))]
3699  "TARGET_NEON"
3700  "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3701  [(set_attr "type" "neon_rev<q>")]
3702)
3703
3704(define_insn "neon_vrev32<mode>"
3705  [(set (match_operand:VX 0 "s_register_operand" "=w")
3706	(unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3707                   UNSPEC_VREV32))]
3708  "TARGET_NEON"
3709  "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3710  [(set_attr "type" "neon_rev<q>")]
3711)
3712
3713(define_insn "neon_vrev16<mode>"
3714  [(set (match_operand:VE 0 "s_register_operand" "=w")
3715	(unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3716                   UNSPEC_VREV16))]
3717  "TARGET_NEON"
3718  "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3719  [(set_attr "type" "neon_rev<q>")]
3720)
3721
3722; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3723; allocation. For an intrinsic of form:
3724;   rD = vbsl_* (rS, rN, rM)
3725; We can use any of:
3726;   vbsl rS, rN, rM  (if D = S)
3727;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
3728;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
3729
3730(define_insn "neon_vbsl<mode>_internal"
3731  [(set (match_operand:VDQX 0 "s_register_operand"		 "=w,w,w")
3732	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3733		      (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3734                      (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3735                     UNSPEC_VBSL))]
3736  "TARGET_NEON"
3737  "@
3738  vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3739  vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3740  vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3741  [(set_attr "type" "neon_bsl<q>")]
3742)
3743
3744(define_expand "neon_vbsl<mode>"
3745  [(set (match_operand:VDQX 0 "s_register_operand" "")
3746        (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3747                      (match_operand:VDQX 2 "s_register_operand" "")
3748                      (match_operand:VDQX 3 "s_register_operand" "")]
3749                     UNSPEC_VBSL))]
3750  "TARGET_NEON"
3751{
3752  /* We can't alias operands together if they have different modes.  */
3753  operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3754})
3755
3756;; vshl, vrshl
3757(define_insn "neon_v<shift_op><sup><mode>"
3758  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3759	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3760		       (match_operand:VDQIX 2 "s_register_operand" "w")]
3761                      VSHL))]
3762  "TARGET_NEON"
3763  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3764  [(set_attr "type" "neon_shift_imm<q>")]
3765)
3766
3767;; vqshl, vqrshl
3768(define_insn "neon_v<shift_op><sup><mode>"
3769  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3770	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3771		       (match_operand:VDQIX 2 "s_register_operand" "w")]
3772                      VQSHL))]
3773  "TARGET_NEON"
3774  "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3775  [(set_attr "type" "neon_sat_shift_imm<q>")]
3776)
3777
3778;; vshr_n, vrshr_n
3779(define_insn "neon_v<shift_op><sup>_n<mode>"
3780  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3781	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3782		       (match_operand:SI 2 "immediate_operand" "i")]
3783                      VSHR_N))]
3784  "TARGET_NEON"
3785{
3786  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3787  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3788}
3789  [(set_attr "type" "neon_shift_imm<q>")]
3790)
3791
3792;; vshrn_n, vrshrn_n
3793(define_insn "neon_v<shift_op>_n<mode>"
3794  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3795	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3796			    (match_operand:SI 2 "immediate_operand" "i")]
3797                           VSHRN_N))]
3798  "TARGET_NEON"
3799{
3800  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3801  return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3802}
3803  [(set_attr "type" "neon_shift_imm_narrow_q")]
3804)
3805
3806;; vqshrn_n, vqrshrn_n
3807(define_insn "neon_v<shift_op><sup>_n<mode>"
3808  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3809	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3810			    (match_operand:SI 2 "immediate_operand" "i")]
3811                           VQSHRN_N))]
3812  "TARGET_NEON"
3813{
3814  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3815  return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3816}
3817  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3818)
3819
3820;; vqshrun_n, vqrshrun_n
3821(define_insn "neon_v<shift_op>_n<mode>"
3822  [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3823	(unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3824			    (match_operand:SI 2 "immediate_operand" "i")]
3825                           VQSHRUN_N))]
3826  "TARGET_NEON"
3827{
3828  neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3829  return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3830}
3831  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3832)
3833
3834(define_insn "neon_vshl_n<mode>"
3835  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3836	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3837		       (match_operand:SI 2 "immediate_operand" "i")]
3838                      UNSPEC_VSHL_N))]
3839  "TARGET_NEON"
3840{
3841  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3842  return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3843}
3844  [(set_attr "type" "neon_shift_imm<q>")]
3845)
3846
3847(define_insn "neon_vqshl_<sup>_n<mode>"
3848  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3849	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3850		       (match_operand:SI 2 "immediate_operand" "i")]
3851                      VQSHL_N))]
3852  "TARGET_NEON"
3853{
3854  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3855  return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3856}
3857  [(set_attr "type" "neon_sat_shift_imm<q>")]
3858)
3859
3860(define_insn "neon_vqshlu_n<mode>"
3861  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3862	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3863		       (match_operand:SI 2 "immediate_operand" "i")]
3864                      UNSPEC_VQSHLU_N))]
3865  "TARGET_NEON"
3866{
3867  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3868  return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3869}
3870  [(set_attr "type" "neon_sat_shift_imm<q>")]
3871)
3872
3873(define_insn "neon_vshll<sup>_n<mode>"
3874  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3875	(unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3876			   (match_operand:SI 2 "immediate_operand" "i")]
3877			  VSHLL_N))]
3878  "TARGET_NEON"
3879{
3880  /* The boundaries are: 0 < imm <= size.  */
3881  neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3882  return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3883}
3884  [(set_attr "type" "neon_shift_imm_long")]
3885)
3886
3887;; vsra_n, vrsra_n
3888(define_insn "neon_v<shift_op><sup>_n<mode>"
3889  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3890	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3891		       (match_operand:VDQIX 2 "s_register_operand" "w")
3892                       (match_operand:SI 3 "immediate_operand" "i")]
3893                      VSRA_N))]
3894  "TARGET_NEON"
3895{
3896  neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3897  return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3898}
3899  [(set_attr "type" "neon_shift_acc<q>")]
3900)
3901
3902(define_insn "neon_vsri_n<mode>"
3903  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3904	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3905        	       (match_operand:VDQIX 2 "s_register_operand" "w")
3906                       (match_operand:SI 3 "immediate_operand" "i")]
3907                      UNSPEC_VSRI))]
3908  "TARGET_NEON"
3909{
3910  neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
3911  return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3912}
3913  [(set_attr "type" "neon_shift_reg<q>")]
3914)
3915
3916(define_insn "neon_vsli_n<mode>"
3917  [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3918	(unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
3919        	       (match_operand:VDQIX 2 "s_register_operand" "w")
3920                       (match_operand:SI 3 "immediate_operand" "i")]
3921                      UNSPEC_VSLI))]
3922  "TARGET_NEON"
3923{
3924  neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
3925  return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
3926}
3927  [(set_attr "type" "neon_shift_reg<q>")]
3928)
3929
3930(define_insn "neon_vtbl1v8qi"
3931  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3932	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
3933		      (match_operand:V8QI 2 "s_register_operand" "w")]
3934                     UNSPEC_VTBL))]
3935  "TARGET_NEON"
3936  "vtbl.8\t%P0, {%P1}, %P2"
3937  [(set_attr "type" "neon_tbl1")]
3938)
3939
3940(define_insn "neon_vtbl2v8qi"
3941  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3942	(unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
3943		      (match_operand:V8QI 2 "s_register_operand" "w")]
3944                     UNSPEC_VTBL))]
3945  "TARGET_NEON"
3946{
3947  rtx ops[4];
3948  int tabbase = REGNO (operands[1]);
3949
3950  ops[0] = operands[0];
3951  ops[1] = gen_rtx_REG (V8QImode, tabbase);
3952  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3953  ops[3] = operands[2];
3954  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
3955
3956  return "";
3957}
3958  [(set_attr "type" "neon_tbl2")]
3959)
3960
3961(define_insn "neon_vtbl3v8qi"
3962  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3963	(unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
3964		      (match_operand:V8QI 2 "s_register_operand" "w")]
3965                     UNSPEC_VTBL))]
3966  "TARGET_NEON"
3967{
3968  rtx ops[5];
3969  int tabbase = REGNO (operands[1]);
3970
3971  ops[0] = operands[0];
3972  ops[1] = gen_rtx_REG (V8QImode, tabbase);
3973  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3974  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3975  ops[4] = operands[2];
3976  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
3977
3978  return "";
3979}
3980  [(set_attr "type" "neon_tbl3")]
3981)
3982
3983(define_insn "neon_vtbl4v8qi"
3984  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
3985	(unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
3986		      (match_operand:V8QI 2 "s_register_operand" "w")]
3987                     UNSPEC_VTBL))]
3988  "TARGET_NEON"
3989{
3990  rtx ops[6];
3991  int tabbase = REGNO (operands[1]);
3992
3993  ops[0] = operands[0];
3994  ops[1] = gen_rtx_REG (V8QImode, tabbase);
3995  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
3996  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
3997  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
3998  ops[5] = operands[2];
3999  output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4000
4001  return "";
4002}
4003  [(set_attr "type" "neon_tbl4")]
4004)
4005
4006;; These three are used by the vec_perm infrastructure for V16QImode.
4007(define_insn_and_split "neon_vtbl1v16qi"
4008  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4009	(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4010		       (match_operand:V16QI 2 "s_register_operand" "w")]
4011		      UNSPEC_VTBL))]
4012  "TARGET_NEON"
4013  "#"
4014  "&& reload_completed"
4015  [(const_int 0)]
4016{
4017  rtx op0, op1, op2, part0, part2;
4018  unsigned ofs;
4019
4020  op0 = operands[0];
4021  op1 = gen_lowpart (TImode, operands[1]);
4022  op2 = operands[2];
4023
4024  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4025  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4026  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4027  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4028
4029  ofs = subreg_highpart_offset (V8QImode, V16QImode);
4030  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4031  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4032  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4033  DONE;
4034}
4035  [(set_attr "type" "multiple")]
4036)
4037
4038(define_insn_and_split "neon_vtbl2v16qi"
4039  [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4040	(unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4041		       (match_operand:V16QI 2 "s_register_operand" "w")]
4042		      UNSPEC_VTBL))]
4043  "TARGET_NEON"
4044  "#"
4045  "&& reload_completed"
4046  [(const_int 0)]
4047{
4048  rtx op0, op1, op2, part0, part2;
4049  unsigned ofs;
4050
4051  op0 = operands[0];
4052  op1 = operands[1];
4053  op2 = operands[2];
4054
4055  ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4056  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4057  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4058  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4059
4060  ofs = subreg_highpart_offset (V8QImode, V16QImode);
4061  part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4062  part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4063  emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4064  DONE;
4065}
4066  [(set_attr "type" "multiple")]
4067)
4068
4069;; ??? Logically we should extend the regular neon_vcombine pattern to
4070;; handle quad-word input modes, producing octa-word output modes.  But
4071;; that requires us to add support for octa-word vector modes in moves.
4072;; That seems overkill for this one use in vec_perm.
4073(define_insn_and_split "neon_vcombinev16qi"
4074  [(set (match_operand:OI 0 "s_register_operand" "=w")
4075	(unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4076		    (match_operand:V16QI 2 "s_register_operand" "w")]
4077		   UNSPEC_VCONCAT))]
4078  "TARGET_NEON"
4079  "#"
4080  "&& reload_completed"
4081  [(const_int 0)]
4082{
4083  neon_split_vcombine (operands);
4084  DONE;
4085}
4086[(set_attr "type" "multiple")]
4087)
4088
4089(define_insn "neon_vtbx1v8qi"
4090  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4091	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4092		      (match_operand:V8QI 2 "s_register_operand" "w")
4093		      (match_operand:V8QI 3 "s_register_operand" "w")]
4094                     UNSPEC_VTBX))]
4095  "TARGET_NEON"
4096  "vtbx.8\t%P0, {%P2}, %P3"
4097  [(set_attr "type" "neon_tbl1")]
4098)
4099
4100(define_insn "neon_vtbx2v8qi"
4101  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4102	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4103		      (match_operand:TI 2 "s_register_operand" "w")
4104		      (match_operand:V8QI 3 "s_register_operand" "w")]
4105                     UNSPEC_VTBX))]
4106  "TARGET_NEON"
4107{
4108  rtx ops[4];
4109  int tabbase = REGNO (operands[2]);
4110
4111  ops[0] = operands[0];
4112  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4113  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4114  ops[3] = operands[3];
4115  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4116
4117  return "";
4118}
4119  [(set_attr "type" "neon_tbl2")]
4120)
4121
4122(define_insn "neon_vtbx3v8qi"
4123  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4124	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4125		      (match_operand:EI 2 "s_register_operand" "w")
4126		      (match_operand:V8QI 3 "s_register_operand" "w")]
4127                     UNSPEC_VTBX))]
4128  "TARGET_NEON"
4129{
4130  rtx ops[5];
4131  int tabbase = REGNO (operands[2]);
4132
4133  ops[0] = operands[0];
4134  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4135  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4136  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4137  ops[4] = operands[3];
4138  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4139
4140  return "";
4141}
4142  [(set_attr "type" "neon_tbl3")]
4143)
4144
4145(define_insn "neon_vtbx4v8qi"
4146  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4147	(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4148		      (match_operand:OI 2 "s_register_operand" "w")
4149		      (match_operand:V8QI 3 "s_register_operand" "w")]
4150                     UNSPEC_VTBX))]
4151  "TARGET_NEON"
4152{
4153  rtx ops[6];
4154  int tabbase = REGNO (operands[2]);
4155
4156  ops[0] = operands[0];
4157  ops[1] = gen_rtx_REG (V8QImode, tabbase);
4158  ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4159  ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4160  ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4161  ops[5] = operands[3];
4162  output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4163
4164  return "";
4165}
4166  [(set_attr "type" "neon_tbl4")]
4167)
4168
4169(define_expand "neon_vtrn<mode>_internal"
4170  [(parallel
4171    [(set (match_operand:VDQW 0 "s_register_operand" "")
4172	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4173			(match_operand:VDQW 2 "s_register_operand" "")]
4174	   UNSPEC_VTRN1))
4175     (set (match_operand:VDQW 3 "s_register_operand" "")
4176          (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4177  "TARGET_NEON"
4178  ""
4179)
4180
4181;; Note: Different operand numbering to handle tied registers correctly.
4182(define_insn "*neon_vtrn<mode>_insn"
4183  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4184        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4185                      (match_operand:VDQW 3 "s_register_operand" "2")]
4186                     UNSPEC_VTRN1))
4187   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4188         (unspec:VDQW [(match_dup 1) (match_dup 3)]
4189                     UNSPEC_VTRN2))]
4190  "TARGET_NEON"
4191  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4192  [(set_attr "type" "neon_permute<q>")]
4193)
4194
4195(define_expand "neon_vzip<mode>_internal"
4196  [(parallel
4197    [(set (match_operand:VDQW 0 "s_register_operand" "")
4198	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4199	  	        (match_operand:VDQW 2 "s_register_operand" "")]
4200		       UNSPEC_VZIP1))
4201    (set (match_operand:VDQW 3 "s_register_operand" "")
4202	 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4203  "TARGET_NEON"
4204  ""
4205)
4206
4207;; Note: Different operand numbering to handle tied registers correctly.
4208(define_insn "*neon_vzip<mode>_insn"
4209  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4210        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4211                      (match_operand:VDQW 3 "s_register_operand" "2")]
4212                     UNSPEC_VZIP1))
4213   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4214        (unspec:VDQW [(match_dup 1) (match_dup 3)]
4215                     UNSPEC_VZIP2))]
4216  "TARGET_NEON"
4217  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4218  [(set_attr "type" "neon_zip<q>")]
4219)
4220
4221(define_expand "neon_vuzp<mode>_internal"
4222  [(parallel
4223    [(set (match_operand:VDQW 0 "s_register_operand" "")
4224	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4225			(match_operand:VDQW 2 "s_register_operand" "")]
4226	   UNSPEC_VUZP1))
4227     (set (match_operand:VDQW 3 "s_register_operand" "")
4228	  (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4229  "TARGET_NEON"
4230  ""
4231)
4232
4233;; Note: Different operand numbering to handle tied registers correctly.
4234(define_insn "*neon_vuzp<mode>_insn"
4235  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4236        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4237                      (match_operand:VDQW 3 "s_register_operand" "2")]
4238                     UNSPEC_VUZP1))
4239   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4240        (unspec:VDQW [(match_dup 1) (match_dup 3)]
4241                     UNSPEC_VUZP2))]
4242  "TARGET_NEON"
4243  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4244  [(set_attr "type" "neon_zip<q>")]
4245)
4246
4247(define_expand "vec_load_lanes<mode><mode>"
4248  [(set (match_operand:VDQX 0 "s_register_operand")
4249        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4250                     UNSPEC_VLD1))]
4251  "TARGET_NEON")
4252
4253(define_insn "neon_vld1<mode>"
4254  [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4255        (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4256                    UNSPEC_VLD1))]
4257  "TARGET_NEON"
4258  "vld1.<V_sz_elem>\t%h0, %A1"
4259  [(set_attr "type" "neon_load1_1reg<q>")]
4260)
4261
4262;; The lane numbers in the RTL are in GCC lane order, having been flipped
4263;; in arm_expand_neon_args. The lane numbers are restored to architectural
4264;; lane order here.
4265(define_insn "neon_vld1_lane<mode>"
4266  [(set (match_operand:VDX 0 "s_register_operand" "=w")
4267        (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4268                     (match_operand:VDX 2 "s_register_operand" "0")
4269                     (match_operand:SI 3 "immediate_operand" "i")]
4270                    UNSPEC_VLD1_LANE))]
4271  "TARGET_NEON"
4272{
4273  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4274  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4275  operands[3] = GEN_INT (lane);
4276  if (max == 1)
4277    return "vld1.<V_sz_elem>\t%P0, %A1";
4278  else
4279    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4280}
4281  [(set_attr "type" "neon_load1_one_lane<q>")]
4282)
4283
4284;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4285;; here on big endian targets.
4286(define_insn "neon_vld1_lane<mode>"
4287  [(set (match_operand:VQX 0 "s_register_operand" "=w")
4288        (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4289                     (match_operand:VQX 2 "s_register_operand" "0")
4290                     (match_operand:SI 3 "immediate_operand" "i")]
4291                    UNSPEC_VLD1_LANE))]
4292  "TARGET_NEON"
4293{
4294  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4295  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4296  operands[3] = GEN_INT (lane);
4297  int regno = REGNO (operands[0]);
4298  if (lane >= max / 2)
4299    {
4300      lane -= max / 2;
4301      regno += 2;
4302      operands[3] = GEN_INT (lane);
4303    }
4304  operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4305  if (max == 2)
4306    return "vld1.<V_sz_elem>\t%P0, %A1";
4307  else
4308    return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4309}
4310  [(set_attr "type" "neon_load1_one_lane<q>")]
4311)
4312
4313(define_insn "neon_vld1_dup<mode>"
4314  [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4315        (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4316  "TARGET_NEON"
4317  "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4318  [(set_attr "type" "neon_load1_all_lanes<q>")]
4319)
4320
4321;; Special case for DImode.  Treat it exactly like a simple load.
4322(define_expand "neon_vld1_dupdi"
4323  [(set (match_operand:DI 0 "s_register_operand" "")
4324        (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4325		   UNSPEC_VLD1))]
4326  "TARGET_NEON"
4327  ""
4328)
4329
4330(define_insn "neon_vld1_dup<mode>"
4331  [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4332        (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4333  "TARGET_NEON"
4334{
4335  return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4336}
4337  [(set_attr "type" "neon_load1_all_lanes<q>")]
4338)
4339
4340(define_insn_and_split "neon_vld1_dupv2di"
4341   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4342    (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4343   "TARGET_NEON"
4344   "#"
4345   "&& reload_completed"
4346   [(const_int 0)]
4347   {
4348    rtx tmprtx = gen_lowpart (DImode, operands[0]);
4349    emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4350    emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4351    DONE;
4352    }
4353  [(set_attr "length" "8")
4354   (set_attr "type" "neon_load1_all_lanes_q")]
4355)
4356
4357(define_expand "vec_store_lanes<mode><mode>"
4358  [(set (match_operand:VDQX 0 "neon_struct_operand")
4359	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4360		     UNSPEC_VST1))]
4361  "TARGET_NEON")
4362
4363(define_insn "neon_vst1<mode>"
4364  [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4365	(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4366		     UNSPEC_VST1))]
4367  "TARGET_NEON"
4368  "vst1.<V_sz_elem>\t%h1, %A0"
4369  [(set_attr "type" "neon_store1_1reg<q>")])
4370
4371;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4372;; here on big endian targets.
4373(define_insn "neon_vst1_lane<mode>"
4374  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4375	(unspec:<V_elem>
4376	  [(match_operand:VDX 1 "s_register_operand" "w")
4377	   (match_operand:SI 2 "immediate_operand" "i")]
4378	  UNSPEC_VST1_LANE))]
4379  "TARGET_NEON"
4380{
4381  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4382  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4383  operands[2] = GEN_INT (lane);
4384  if (max == 1)
4385    return "vst1.<V_sz_elem>\t{%P1}, %A0";
4386  else
4387    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4388}
4389  [(set_attr "type" "neon_store1_one_lane<q>")]
4390)
4391
4392;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4393;; here on big endian targets.
4394(define_insn "neon_vst1_lane<mode>"
4395  [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4396	(unspec:<V_elem>
4397	  [(match_operand:VQX 1 "s_register_operand" "w")
4398	   (match_operand:SI 2 "immediate_operand" "i")]
4399	  UNSPEC_VST1_LANE))]
4400  "TARGET_NEON"
4401{
4402  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4403  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4404  int regno = REGNO (operands[1]);
4405  if (lane >= max / 2)
4406    {
4407      lane -= max / 2;
4408      regno += 2;
4409    }
4410  operands[2] = GEN_INT (lane);
4411  operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4412  if (max == 2)
4413    return "vst1.<V_sz_elem>\t{%P1}, %A0";
4414  else
4415    return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4416}
4417  [(set_attr "type" "neon_store1_one_lane<q>")]
4418)
4419
4420(define_expand "vec_load_lanesti<mode>"
4421  [(set (match_operand:TI 0 "s_register_operand")
4422        (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4423                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4424		   UNSPEC_VLD2))]
4425  "TARGET_NEON")
4426
4427(define_insn "neon_vld2<mode>"
4428  [(set (match_operand:TI 0 "s_register_operand" "=w")
4429        (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4430                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4431                   UNSPEC_VLD2))]
4432  "TARGET_NEON"
4433{
4434  if (<V_sz_elem> == 64)
4435    return "vld1.64\t%h0, %A1";
4436  else
4437    return "vld2.<V_sz_elem>\t%h0, %A1";
4438}
4439  [(set (attr "type")
4440      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4441                    (const_string "neon_load1_2reg<q>")
4442                    (const_string "neon_load2_2reg<q>")))]
4443)
4444
4445(define_expand "vec_load_lanesoi<mode>"
4446  [(set (match_operand:OI 0 "s_register_operand")
4447        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4448                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4449		   UNSPEC_VLD2))]
4450  "TARGET_NEON")
4451
4452(define_insn "neon_vld2<mode>"
4453  [(set (match_operand:OI 0 "s_register_operand" "=w")
4454        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4455                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4456                   UNSPEC_VLD2))]
4457  "TARGET_NEON"
4458  "vld2.<V_sz_elem>\t%h0, %A1"
4459  [(set_attr "type" "neon_load2_2reg_q")])
4460
4461;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4462;; here on big endian targets.
4463(define_insn "neon_vld2_lane<mode>"
4464  [(set (match_operand:TI 0 "s_register_operand" "=w")
4465        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4466                    (match_operand:TI 2 "s_register_operand" "0")
4467                    (match_operand:SI 3 "immediate_operand" "i")
4468                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4469                   UNSPEC_VLD2_LANE))]
4470  "TARGET_NEON"
4471{
4472  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4473  int regno = REGNO (operands[0]);
4474  rtx ops[4];
4475  ops[0] = gen_rtx_REG (DImode, regno);
4476  ops[1] = gen_rtx_REG (DImode, regno + 2);
4477  ops[2] = operands[1];
4478  ops[3] = GEN_INT (lane);
4479  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4480  return "";
4481}
4482  [(set_attr "type" "neon_load2_one_lane<q>")]
4483)
4484
4485;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4486;; here on big endian targets.
4487(define_insn "neon_vld2_lane<mode>"
4488  [(set (match_operand:OI 0 "s_register_operand" "=w")
4489        (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4490                    (match_operand:OI 2 "s_register_operand" "0")
4491                    (match_operand:SI 3 "immediate_operand" "i")
4492                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4493                   UNSPEC_VLD2_LANE))]
4494  "TARGET_NEON"
4495{
4496  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4497  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4498  int regno = REGNO (operands[0]);
4499  rtx ops[4];
4500  if (lane >= max / 2)
4501    {
4502      lane -= max / 2;
4503      regno += 2;
4504    }
4505  ops[0] = gen_rtx_REG (DImode, regno);
4506  ops[1] = gen_rtx_REG (DImode, regno + 4);
4507  ops[2] = operands[1];
4508  ops[3] = GEN_INT (lane);
4509  output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4510  return "";
4511}
4512  [(set_attr "type" "neon_load2_one_lane<q>")]
4513)
4514
4515(define_insn "neon_vld2_dup<mode>"
4516  [(set (match_operand:TI 0 "s_register_operand" "=w")
4517        (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4518                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4519                   UNSPEC_VLD2_DUP))]
4520  "TARGET_NEON"
4521{
4522  if (GET_MODE_NUNITS (<MODE>mode) > 1)
4523    return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4524  else
4525    return "vld1.<V_sz_elem>\t%h0, %A1";
4526}
4527  [(set (attr "type")
4528      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4529                    (const_string "neon_load2_all_lanes<q>")
4530                    (const_string "neon_load1_1reg<q>")))]
4531)
4532
4533(define_expand "vec_store_lanesti<mode>"
4534  [(set (match_operand:TI 0 "neon_struct_operand")
4535	(unspec:TI [(match_operand:TI 1 "s_register_operand")
4536                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4537                   UNSPEC_VST2))]
4538  "TARGET_NEON")
4539
4540(define_insn "neon_vst2<mode>"
4541  [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4542        (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4543                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544                   UNSPEC_VST2))]
4545  "TARGET_NEON"
4546{
4547  if (<V_sz_elem> == 64)
4548    return "vst1.64\t%h1, %A0";
4549  else
4550    return "vst2.<V_sz_elem>\t%h1, %A0";
4551}
4552  [(set (attr "type")
4553      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4554                    (const_string "neon_store1_2reg<q>")
4555                    (const_string "neon_store2_one_lane<q>")))]
4556)
4557
4558(define_expand "vec_store_lanesoi<mode>"
4559  [(set (match_operand:OI 0 "neon_struct_operand")
4560	(unspec:OI [(match_operand:OI 1 "s_register_operand")
4561                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4562                   UNSPEC_VST2))]
4563  "TARGET_NEON")
4564
4565(define_insn "neon_vst2<mode>"
4566  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4567	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4568		    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569		   UNSPEC_VST2))]
4570  "TARGET_NEON"
4571  "vst2.<V_sz_elem>\t%h1, %A0"
4572  [(set_attr "type" "neon_store2_4reg<q>")]
4573)
4574
4575;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4576;; here on big endian targets.
4577(define_insn "neon_vst2_lane<mode>"
4578  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4579	(unspec:<V_two_elem>
4580	  [(match_operand:TI 1 "s_register_operand" "w")
4581	   (match_operand:SI 2 "immediate_operand" "i")
4582	   (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4583	  UNSPEC_VST2_LANE))]
4584  "TARGET_NEON"
4585{
4586  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4587  int regno = REGNO (operands[1]);
4588  rtx ops[4];
4589  ops[0] = operands[0];
4590  ops[1] = gen_rtx_REG (DImode, regno);
4591  ops[2] = gen_rtx_REG (DImode, regno + 2);
4592  ops[3] = GEN_INT (lane);
4593  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4594  return "";
4595}
4596  [(set_attr "type" "neon_store2_one_lane<q>")]
4597)
4598
4599;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4600;; here on big endian targets.
4601(define_insn "neon_vst2_lane<mode>"
4602  [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4603        (unspec:<V_two_elem>
4604           [(match_operand:OI 1 "s_register_operand" "w")
4605            (match_operand:SI 2 "immediate_operand" "i")
4606            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4607           UNSPEC_VST2_LANE))]
4608  "TARGET_NEON"
4609{
4610  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4611  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4612  int regno = REGNO (operands[1]);
4613  rtx ops[4];
4614  if (lane >= max / 2)
4615    {
4616      lane -= max / 2;
4617      regno += 2;
4618    }
4619  ops[0] = operands[0];
4620  ops[1] = gen_rtx_REG (DImode, regno);
4621  ops[2] = gen_rtx_REG (DImode, regno + 4);
4622  ops[3] = GEN_INT (lane);
4623  output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4624  return "";
4625}
4626  [(set_attr "type" "neon_store2_one_lane<q>")]
4627)
4628
4629(define_expand "vec_load_lanesei<mode>"
4630  [(set (match_operand:EI 0 "s_register_operand")
4631        (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4632                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4633		   UNSPEC_VLD3))]
4634  "TARGET_NEON")
4635
4636(define_insn "neon_vld3<mode>"
4637  [(set (match_operand:EI 0 "s_register_operand" "=w")
4638        (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4639                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4640                   UNSPEC_VLD3))]
4641  "TARGET_NEON"
4642{
4643  if (<V_sz_elem> == 64)
4644    return "vld1.64\t%h0, %A1";
4645  else
4646    return "vld3.<V_sz_elem>\t%h0, %A1";
4647}
4648  [(set (attr "type")
4649      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4650                    (const_string "neon_load1_3reg<q>")
4651                    (const_string "neon_load3_3reg<q>")))]
4652)
4653
4654(define_expand "vec_load_lanesci<mode>"
4655  [(match_operand:CI 0 "s_register_operand")
4656   (match_operand:CI 1 "neon_struct_operand")
4657   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4658  "TARGET_NEON"
4659{
4660  emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4661  DONE;
4662})
4663
4664(define_expand "neon_vld3<mode>"
4665  [(match_operand:CI 0 "s_register_operand")
4666   (match_operand:CI 1 "neon_struct_operand")
4667   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4668  "TARGET_NEON"
4669{
4670  rtx mem;
4671
4672  mem = adjust_address (operands[1], EImode, 0);
4673  emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4674  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4675  emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4676  DONE;
4677})
4678
4679(define_insn "neon_vld3qa<mode>"
4680  [(set (match_operand:CI 0 "s_register_operand" "=w")
4681        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4682                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4683                   UNSPEC_VLD3A))]
4684  "TARGET_NEON"
4685{
4686  int regno = REGNO (operands[0]);
4687  rtx ops[4];
4688  ops[0] = gen_rtx_REG (DImode, regno);
4689  ops[1] = gen_rtx_REG (DImode, regno + 4);
4690  ops[2] = gen_rtx_REG (DImode, regno + 8);
4691  ops[3] = operands[1];
4692  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4693  return "";
4694}
4695  [(set_attr "type" "neon_load3_3reg<q>")]
4696)
4697
4698(define_insn "neon_vld3qb<mode>"
4699  [(set (match_operand:CI 0 "s_register_operand" "=w")
4700        (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4701                    (match_operand:CI 2 "s_register_operand" "0")
4702                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4703                   UNSPEC_VLD3B))]
4704  "TARGET_NEON"
4705{
4706  int regno = REGNO (operands[0]);
4707  rtx ops[4];
4708  ops[0] = gen_rtx_REG (DImode, regno + 2);
4709  ops[1] = gen_rtx_REG (DImode, regno + 6);
4710  ops[2] = gen_rtx_REG (DImode, regno + 10);
4711  ops[3] = operands[1];
4712  output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4713  return "";
4714}
4715  [(set_attr "type" "neon_load3_3reg<q>")]
4716)
4717
4718;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4719;; here on big endian targets.
4720(define_insn "neon_vld3_lane<mode>"
4721  [(set (match_operand:EI 0 "s_register_operand" "=w")
4722        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4723                    (match_operand:EI 2 "s_register_operand" "0")
4724                    (match_operand:SI 3 "immediate_operand" "i")
4725                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4726                   UNSPEC_VLD3_LANE))]
4727  "TARGET_NEON"
4728{
4729  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4730  int regno = REGNO (operands[0]);
4731  rtx ops[5];
4732  ops[0] = gen_rtx_REG (DImode, regno);
4733  ops[1] = gen_rtx_REG (DImode, regno + 2);
4734  ops[2] = gen_rtx_REG (DImode, regno + 4);
4735  ops[3] = operands[1];
4736  ops[4] = GEN_INT (lane);
4737  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4738                   ops);
4739  return "";
4740}
4741  [(set_attr "type" "neon_load3_one_lane<q>")]
4742)
4743
4744;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4745;; here on big endian targets.
4746(define_insn "neon_vld3_lane<mode>"
4747  [(set (match_operand:CI 0 "s_register_operand" "=w")
4748        (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4749                    (match_operand:CI 2 "s_register_operand" "0")
4750                    (match_operand:SI 3 "immediate_operand" "i")
4751                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4752                   UNSPEC_VLD3_LANE))]
4753  "TARGET_NEON"
4754{
4755  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4756  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4757  int regno = REGNO (operands[0]);
4758  rtx ops[5];
4759  if (lane >= max / 2)
4760    {
4761      lane -= max / 2;
4762      regno += 2;
4763    }
4764  ops[0] = gen_rtx_REG (DImode, regno);
4765  ops[1] = gen_rtx_REG (DImode, regno + 4);
4766  ops[2] = gen_rtx_REG (DImode, regno + 8);
4767  ops[3] = operands[1];
4768  ops[4] = GEN_INT (lane);
4769  output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4770                   ops);
4771  return "";
4772}
4773  [(set_attr "type" "neon_load3_one_lane<q>")]
4774)
4775
4776(define_insn "neon_vld3_dup<mode>"
4777  [(set (match_operand:EI 0 "s_register_operand" "=w")
4778        (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4779                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4780                   UNSPEC_VLD3_DUP))]
4781  "TARGET_NEON"
4782{
4783  if (GET_MODE_NUNITS (<MODE>mode) > 1)
4784    {
4785      int regno = REGNO (operands[0]);
4786      rtx ops[4];
4787      ops[0] = gen_rtx_REG (DImode, regno);
4788      ops[1] = gen_rtx_REG (DImode, regno + 2);
4789      ops[2] = gen_rtx_REG (DImode, regno + 4);
4790      ops[3] = operands[1];
4791      output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4792      return "";
4793    }
4794  else
4795    return "vld1.<V_sz_elem>\t%h0, %A1";
4796}
4797  [(set (attr "type")
4798      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4799                    (const_string "neon_load3_all_lanes<q>")
4800                    (const_string "neon_load1_1reg<q>")))])
4801
4802(define_expand "vec_store_lanesei<mode>"
4803  [(set (match_operand:EI 0 "neon_struct_operand")
4804	(unspec:EI [(match_operand:EI 1 "s_register_operand")
4805                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4806                   UNSPEC_VST3))]
4807  "TARGET_NEON")
4808
4809(define_insn "neon_vst3<mode>"
4810  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4811        (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4812                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4813                   UNSPEC_VST3))]
4814  "TARGET_NEON"
4815{
4816  if (<V_sz_elem> == 64)
4817    return "vst1.64\t%h1, %A0";
4818  else
4819    return "vst3.<V_sz_elem>\t%h1, %A0";
4820}
4821  [(set (attr "type")
4822      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4823                    (const_string "neon_store1_3reg<q>")
4824                    (const_string "neon_store3_one_lane<q>")))])
4825
4826(define_expand "vec_store_lanesci<mode>"
4827  [(match_operand:CI 0 "neon_struct_operand")
4828   (match_operand:CI 1 "s_register_operand")
4829   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4830  "TARGET_NEON"
4831{
4832  emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4833  DONE;
4834})
4835
4836(define_expand "neon_vst3<mode>"
4837  [(match_operand:CI 0 "neon_struct_operand")
4838   (match_operand:CI 1 "s_register_operand")
4839   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4840  "TARGET_NEON"
4841{
4842  rtx mem;
4843
4844  mem = adjust_address (operands[0], EImode, 0);
4845  emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4846  mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4847  emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4848  DONE;
4849})
4850
4851(define_insn "neon_vst3qa<mode>"
4852  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4853        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4854                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4855                   UNSPEC_VST3A))]
4856  "TARGET_NEON"
4857{
4858  int regno = REGNO (operands[1]);
4859  rtx ops[4];
4860  ops[0] = operands[0];
4861  ops[1] = gen_rtx_REG (DImode, regno);
4862  ops[2] = gen_rtx_REG (DImode, regno + 4);
4863  ops[3] = gen_rtx_REG (DImode, regno + 8);
4864  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4865  return "";
4866}
4867  [(set_attr "type" "neon_store3_3reg<q>")]
4868)
4869
4870(define_insn "neon_vst3qb<mode>"
4871  [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4872        (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4873                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4874                   UNSPEC_VST3B))]
4875  "TARGET_NEON"
4876{
4877  int regno = REGNO (operands[1]);
4878  rtx ops[4];
4879  ops[0] = operands[0];
4880  ops[1] = gen_rtx_REG (DImode, regno + 2);
4881  ops[2] = gen_rtx_REG (DImode, regno + 6);
4882  ops[3] = gen_rtx_REG (DImode, regno + 10);
4883  output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4884  return "";
4885}
4886  [(set_attr "type" "neon_store3_3reg<q>")]
4887)
4888
4889;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4890;; here on big endian targets.
4891(define_insn "neon_vst3_lane<mode>"
4892  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4893        (unspec:<V_three_elem>
4894           [(match_operand:EI 1 "s_register_operand" "w")
4895            (match_operand:SI 2 "immediate_operand" "i")
4896            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4897           UNSPEC_VST3_LANE))]
4898  "TARGET_NEON"
4899{
4900  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4901  int regno = REGNO (operands[1]);
4902  rtx ops[5];
4903  ops[0] = operands[0];
4904  ops[1] = gen_rtx_REG (DImode, regno);
4905  ops[2] = gen_rtx_REG (DImode, regno + 2);
4906  ops[3] = gen_rtx_REG (DImode, regno + 4);
4907  ops[4] = GEN_INT (lane);
4908  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4909                   ops);
4910  return "";
4911}
4912  [(set_attr "type" "neon_store3_one_lane<q>")]
4913)
4914
4915;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4916;; here on big endian targets.
4917(define_insn "neon_vst3_lane<mode>"
4918  [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
4919        (unspec:<V_three_elem>
4920           [(match_operand:CI 1 "s_register_operand" "w")
4921            (match_operand:SI 2 "immediate_operand" "i")
4922            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4923           UNSPEC_VST3_LANE))]
4924  "TARGET_NEON"
4925{
4926  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4927  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4928  int regno = REGNO (operands[1]);
4929  rtx ops[5];
4930  if (lane >= max / 2)
4931    {
4932      lane -= max / 2;
4933      regno += 2;
4934    }
4935  ops[0] = operands[0];
4936  ops[1] = gen_rtx_REG (DImode, regno);
4937  ops[2] = gen_rtx_REG (DImode, regno + 4);
4938  ops[3] = gen_rtx_REG (DImode, regno + 8);
4939  ops[4] = GEN_INT (lane);
4940  output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
4941                   ops);
4942  return "";
4943}
4944  [(set_attr "type" "neon_store3_one_lane<q>")]
4945)
4946
4947(define_expand "vec_load_lanesoi<mode>"
4948  [(set (match_operand:OI 0 "s_register_operand")
4949        (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4950                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4951		   UNSPEC_VLD4))]
4952  "TARGET_NEON")
4953
4954(define_insn "neon_vld4<mode>"
4955  [(set (match_operand:OI 0 "s_register_operand" "=w")
4956        (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4957                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4958                   UNSPEC_VLD4))]
4959  "TARGET_NEON"
4960{
4961  if (<V_sz_elem> == 64)
4962    return "vld1.64\t%h0, %A1";
4963  else
4964    return "vld4.<V_sz_elem>\t%h0, %A1";
4965}
4966  [(set (attr "type")
4967      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4968                    (const_string "neon_load1_4reg<q>")
4969                    (const_string "neon_load4_4reg<q>")))]
4970)
4971
4972(define_expand "vec_load_lanesxi<mode>"
4973  [(match_operand:XI 0 "s_register_operand")
4974   (match_operand:XI 1 "neon_struct_operand")
4975   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4976  "TARGET_NEON"
4977{
4978  emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
4979  DONE;
4980})
4981
4982(define_expand "neon_vld4<mode>"
4983  [(match_operand:XI 0 "s_register_operand")
4984   (match_operand:XI 1 "neon_struct_operand")
4985   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4986  "TARGET_NEON"
4987{
4988  rtx mem;
4989
4990  mem = adjust_address (operands[1], OImode, 0);
4991  emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
4992  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
4993  emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
4994  DONE;
4995})
4996
4997(define_insn "neon_vld4qa<mode>"
4998  [(set (match_operand:XI 0 "s_register_operand" "=w")
4999        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5000                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5001                   UNSPEC_VLD4A))]
5002  "TARGET_NEON"
5003{
5004  int regno = REGNO (operands[0]);
5005  rtx ops[5];
5006  ops[0] = gen_rtx_REG (DImode, regno);
5007  ops[1] = gen_rtx_REG (DImode, regno + 4);
5008  ops[2] = gen_rtx_REG (DImode, regno + 8);
5009  ops[3] = gen_rtx_REG (DImode, regno + 12);
5010  ops[4] = operands[1];
5011  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5012  return "";
5013}
5014  [(set_attr "type" "neon_load4_4reg<q>")]
5015)
5016
5017(define_insn "neon_vld4qb<mode>"
5018  [(set (match_operand:XI 0 "s_register_operand" "=w")
5019        (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5020                    (match_operand:XI 2 "s_register_operand" "0")
5021                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5022                   UNSPEC_VLD4B))]
5023  "TARGET_NEON"
5024{
5025  int regno = REGNO (operands[0]);
5026  rtx ops[5];
5027  ops[0] = gen_rtx_REG (DImode, regno + 2);
5028  ops[1] = gen_rtx_REG (DImode, regno + 6);
5029  ops[2] = gen_rtx_REG (DImode, regno + 10);
5030  ops[3] = gen_rtx_REG (DImode, regno + 14);
5031  ops[4] = operands[1];
5032  output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5033  return "";
5034}
5035  [(set_attr "type" "neon_load4_4reg<q>")]
5036)
5037
5038;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5039;; here on big endian targets.
5040(define_insn "neon_vld4_lane<mode>"
5041  [(set (match_operand:OI 0 "s_register_operand" "=w")
5042        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5043                    (match_operand:OI 2 "s_register_operand" "0")
5044                    (match_operand:SI 3 "immediate_operand" "i")
5045                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5046                   UNSPEC_VLD4_LANE))]
5047  "TARGET_NEON"
5048{
5049  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5050  int regno = REGNO (operands[0]);
5051  rtx ops[6];
5052  ops[0] = gen_rtx_REG (DImode, regno);
5053  ops[1] = gen_rtx_REG (DImode, regno + 2);
5054  ops[2] = gen_rtx_REG (DImode, regno + 4);
5055  ops[3] = gen_rtx_REG (DImode, regno + 6);
5056  ops[4] = operands[1];
5057  ops[5] = GEN_INT (lane);
5058  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5059                   ops);
5060  return "";
5061}
5062  [(set_attr "type" "neon_load4_one_lane<q>")]
5063)
5064
5065;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5066;; here on big endian targets.
5067(define_insn "neon_vld4_lane<mode>"
5068  [(set (match_operand:XI 0 "s_register_operand" "=w")
5069        (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5070                    (match_operand:XI 2 "s_register_operand" "0")
5071                    (match_operand:SI 3 "immediate_operand" "i")
5072                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5073                   UNSPEC_VLD4_LANE))]
5074  "TARGET_NEON"
5075{
5076  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5077  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5078  int regno = REGNO (operands[0]);
5079  rtx ops[6];
5080  if (lane >= max / 2)
5081    {
5082      lane -= max / 2;
5083      regno += 2;
5084    }
5085  ops[0] = gen_rtx_REG (DImode, regno);
5086  ops[1] = gen_rtx_REG (DImode, regno + 4);
5087  ops[2] = gen_rtx_REG (DImode, regno + 8);
5088  ops[3] = gen_rtx_REG (DImode, regno + 12);
5089  ops[4] = operands[1];
5090  ops[5] = GEN_INT (lane);
5091  output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5092                   ops);
5093  return "";
5094}
5095  [(set_attr "type" "neon_load4_one_lane<q>")]
5096)
5097
5098(define_insn "neon_vld4_dup<mode>"
5099  [(set (match_operand:OI 0 "s_register_operand" "=w")
5100        (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5101                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5102                   UNSPEC_VLD4_DUP))]
5103  "TARGET_NEON"
5104{
5105  if (GET_MODE_NUNITS (<MODE>mode) > 1)
5106    {
5107      int regno = REGNO (operands[0]);
5108      rtx ops[5];
5109      ops[0] = gen_rtx_REG (DImode, regno);
5110      ops[1] = gen_rtx_REG (DImode, regno + 2);
5111      ops[2] = gen_rtx_REG (DImode, regno + 4);
5112      ops[3] = gen_rtx_REG (DImode, regno + 6);
5113      ops[4] = operands[1];
5114      output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5115                       ops);
5116      return "";
5117    }
5118  else
5119    return "vld1.<V_sz_elem>\t%h0, %A1";
5120}
5121  [(set (attr "type")
5122      (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5123                    (const_string "neon_load4_all_lanes<q>")
5124                    (const_string "neon_load1_1reg<q>")))]
5125)
5126
5127(define_expand "vec_store_lanesoi<mode>"
5128  [(set (match_operand:OI 0 "neon_struct_operand")
5129	(unspec:OI [(match_operand:OI 1 "s_register_operand")
5130                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5131                   UNSPEC_VST4))]
5132  "TARGET_NEON")
5133
5134(define_insn "neon_vst4<mode>"
5135  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5136        (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5137                    (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5138                   UNSPEC_VST4))]
5139  "TARGET_NEON"
5140{
5141  if (<V_sz_elem> == 64)
5142    return "vst1.64\t%h1, %A0";
5143  else
5144    return "vst4.<V_sz_elem>\t%h1, %A0";
5145}
5146  [(set (attr "type")
5147      (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5148                    (const_string "neon_store1_4reg<q>")
5149                    (const_string "neon_store4_4reg<q>")))]
5150)
5151
5152(define_expand "vec_store_lanesxi<mode>"
5153  [(match_operand:XI 0 "neon_struct_operand")
5154   (match_operand:XI 1 "s_register_operand")
5155   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5156  "TARGET_NEON"
5157{
5158  emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5159  DONE;
5160})
5161
5162(define_expand "neon_vst4<mode>"
5163  [(match_operand:XI 0 "neon_struct_operand")
5164   (match_operand:XI 1 "s_register_operand")
5165   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5166  "TARGET_NEON"
5167{
5168  rtx mem;
5169
5170  mem = adjust_address (operands[0], OImode, 0);
5171  emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5172  mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5173  emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5174  DONE;
5175})
5176
5177(define_insn "neon_vst4qa<mode>"
5178  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5179        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5180                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5181                   UNSPEC_VST4A))]
5182  "TARGET_NEON"
5183{
5184  int regno = REGNO (operands[1]);
5185  rtx ops[5];
5186  ops[0] = operands[0];
5187  ops[1] = gen_rtx_REG (DImode, regno);
5188  ops[2] = gen_rtx_REG (DImode, regno + 4);
5189  ops[3] = gen_rtx_REG (DImode, regno + 8);
5190  ops[4] = gen_rtx_REG (DImode, regno + 12);
5191  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5192  return "";
5193}
5194  [(set_attr "type" "neon_store4_4reg<q>")]
5195)
5196
5197(define_insn "neon_vst4qb<mode>"
5198  [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5199        (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5200                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5201                   UNSPEC_VST4B))]
5202  "TARGET_NEON"
5203{
5204  int regno = REGNO (operands[1]);
5205  rtx ops[5];
5206  ops[0] = operands[0];
5207  ops[1] = gen_rtx_REG (DImode, regno + 2);
5208  ops[2] = gen_rtx_REG (DImode, regno + 6);
5209  ops[3] = gen_rtx_REG (DImode, regno + 10);
5210  ops[4] = gen_rtx_REG (DImode, regno + 14);
5211  output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5212  return "";
5213}
5214  [(set_attr "type" "neon_store4_4reg<q>")]
5215)
5216
5217;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5218;; here on big endian targets.
5219(define_insn "neon_vst4_lane<mode>"
5220  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5221        (unspec:<V_four_elem>
5222           [(match_operand:OI 1 "s_register_operand" "w")
5223            (match_operand:SI 2 "immediate_operand" "i")
5224            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5225           UNSPEC_VST4_LANE))]
5226  "TARGET_NEON"
5227{
5228  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5229  int regno = REGNO (operands[1]);
5230  rtx ops[6];
5231  ops[0] = operands[0];
5232  ops[1] = gen_rtx_REG (DImode, regno);
5233  ops[2] = gen_rtx_REG (DImode, regno + 2);
5234  ops[3] = gen_rtx_REG (DImode, regno + 4);
5235  ops[4] = gen_rtx_REG (DImode, regno + 6);
5236  ops[5] = GEN_INT (lane);
5237  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5238                   ops);
5239  return "";
5240}
5241  [(set_attr "type" "neon_store4_one_lane<q>")]
5242)
5243
5244;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5245;; here on big endian targets.
5246(define_insn "neon_vst4_lane<mode>"
5247  [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5248        (unspec:<V_four_elem>
5249           [(match_operand:XI 1 "s_register_operand" "w")
5250            (match_operand:SI 2 "immediate_operand" "i")
5251            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252           UNSPEC_VST4_LANE))]
5253  "TARGET_NEON"
5254{
5255  HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5256  HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5257  int regno = REGNO (operands[1]);
5258  rtx ops[6];
5259  if (lane >= max / 2)
5260    {
5261      lane -= max / 2;
5262      regno += 2;
5263    }
5264  ops[0] = operands[0];
5265  ops[1] = gen_rtx_REG (DImode, regno);
5266  ops[2] = gen_rtx_REG (DImode, regno + 4);
5267  ops[3] = gen_rtx_REG (DImode, regno + 8);
5268  ops[4] = gen_rtx_REG (DImode, regno + 12);
5269  ops[5] = GEN_INT (lane);
5270  output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5271                   ops);
5272  return "";
5273}
5274  [(set_attr "type" "neon_store4_4reg<q>")]
5275)
5276
5277(define_insn "neon_vec_unpack<US>_lo_<mode>"
5278  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5279        (SE:<V_unpack> (vec_select:<V_HALF>
5280			  (match_operand:VU 1 "register_operand" "w")
5281			  (match_operand:VU 2 "vect_par_constant_low" ""))))]
5282  "TARGET_NEON && !BYTES_BIG_ENDIAN"
5283  "vmovl.<US><V_sz_elem> %q0, %e1"
5284  [(set_attr "type" "neon_shift_imm_long")]
5285)
5286
5287(define_insn "neon_vec_unpack<US>_hi_<mode>"
5288  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5289        (SE:<V_unpack> (vec_select:<V_HALF>
5290			  (match_operand:VU 1 "register_operand" "w")
5291			  (match_operand:VU 2 "vect_par_constant_high" ""))))]
5292  "TARGET_NEON && !BYTES_BIG_ENDIAN"
5293  "vmovl.<US><V_sz_elem> %q0, %f1"
5294  [(set_attr "type" "neon_shift_imm_long")]
5295)
5296
5297(define_expand "vec_unpack<US>_hi_<mode>"
5298  [(match_operand:<V_unpack> 0 "register_operand" "")
5299   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5300 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5301  {
5302   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
5303   rtx t1;
5304   int i;
5305   for (i = 0; i < (<V_mode_nunits>/2); i++)
5306     RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5307
5308   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5309   emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5310                                                 operands[1],
5311					         t1));
5312   DONE;
5313  }
5314)
5315
5316(define_expand "vec_unpack<US>_lo_<mode>"
5317  [(match_operand:<V_unpack> 0 "register_operand" "")
5318   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5319 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5320  {
5321   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
5322   rtx t1;
5323   int i;
5324   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5325     RTVEC_ELT (v, i) = GEN_INT (i);
5326   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5327   emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5328                                                 operands[1],
5329				   	         t1));
5330   DONE;
5331  }
5332)
5333
5334(define_insn "neon_vec_<US>mult_lo_<mode>"
5335 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5336       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5337			   (match_operand:VU 1 "register_operand" "w")
5338                           (match_operand:VU 2 "vect_par_constant_low" "")))
5339 		        (SE:<V_unpack> (vec_select:<V_HALF>
5340                           (match_operand:VU 3 "register_operand" "w")
5341                           (match_dup 2)))))]
5342  "TARGET_NEON && !BYTES_BIG_ENDIAN"
5343  "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5344  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5345)
5346
5347(define_expand "vec_widen_<US>mult_lo_<mode>"
5348  [(match_operand:<V_unpack> 0 "register_operand" "")
5349   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5350   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5351 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5352 {
5353   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
5354   rtx t1;
5355   int i;
5356   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5357     RTVEC_ELT (v, i) = GEN_INT (i);
5358   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5359
5360   emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5361 					       operands[1],
5362					       t1,
5363					       operands[2]));
5364   DONE;
5365 }
5366)
5367
5368(define_insn "neon_vec_<US>mult_hi_<mode>"
5369 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5370      (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5371			    (match_operand:VU 1 "register_operand" "w")
5372			    (match_operand:VU 2 "vect_par_constant_high" "")))
5373		       (SE:<V_unpack> (vec_select:<V_HALF>
5374			    (match_operand:VU 3 "register_operand" "w")
5375			    (match_dup 2)))))]
5376  "TARGET_NEON && !BYTES_BIG_ENDIAN"
5377  "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5378  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5379)
5380
5381(define_expand "vec_widen_<US>mult_hi_<mode>"
5382  [(match_operand:<V_unpack> 0 "register_operand" "")
5383   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5384   (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5385 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5386 {
5387   rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
5388   rtx t1;
5389   int i;
5390   for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5391     RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5392   t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5393
5394   emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5395 					       operands[1],
5396					       t1,
5397					       operands[2]));
5398   DONE;
5399
5400 }
5401)
5402
5403(define_insn "neon_vec_<US>shiftl_<mode>"
5404 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5405       (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5406       (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5407  "TARGET_NEON"
5408{
5409  return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5410}
5411  [(set_attr "type" "neon_shift_imm_long")]
5412)
5413
5414(define_expand "vec_widen_<US>shiftl_lo_<mode>"
5415  [(match_operand:<V_unpack> 0 "register_operand" "")
5416   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5417   (match_operand:SI 2 "immediate_operand" "i")]
5418 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5419 {
5420  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5421		simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5422		operands[2]));
5423   DONE;
5424 }
5425)
5426
5427(define_expand "vec_widen_<US>shiftl_hi_<mode>"
5428  [(match_operand:<V_unpack> 0 "register_operand" "")
5429   (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5430   (match_operand:SI 2 "immediate_operand" "i")]
5431 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5432 {
5433  emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5434                simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5435				     GET_MODE_SIZE (<V_HALF>mode)),
5436                operands[2]));
5437   DONE;
5438 }
5439)
5440
5441;; Vectorize for non-neon-quad case
5442(define_insn "neon_unpack<US>_<mode>"
5443 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5444       (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5445 "TARGET_NEON"
5446 "vmovl.<US><V_sz_elem> %q0, %P1"
5447  [(set_attr "type" "neon_move")]
5448)
5449
5450(define_expand "vec_unpack<US>_lo_<mode>"
5451 [(match_operand:<V_double_width> 0 "register_operand" "")
5452  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5453 "TARGET_NEON"
5454{
5455  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5456  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5457  emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5458
5459  DONE;
5460}
5461)
5462
5463(define_expand "vec_unpack<US>_hi_<mode>"
5464 [(match_operand:<V_double_width> 0 "register_operand" "")
5465  (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5466 "TARGET_NEON"
5467{
5468  rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5469  emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5470  emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5471
5472  DONE;
5473}
5474)
5475
5476(define_insn "neon_vec_<US>mult_<mode>"
5477 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5478       (mult:<V_widen> (SE:<V_widen>
5479		 	   (match_operand:VDI 1 "register_operand" "w"))
5480 		       (SE:<V_widen>
5481			   (match_operand:VDI 2 "register_operand" "w"))))]
5482  "TARGET_NEON"
5483  "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5484  [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5485)
5486
5487(define_expand "vec_widen_<US>mult_hi_<mode>"
5488  [(match_operand:<V_double_width> 0 "register_operand" "")
5489   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5490   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5491 "TARGET_NEON"
5492 {
5493   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5494   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5495   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5496
5497   DONE;
5498
5499 }
5500)
5501
5502(define_expand "vec_widen_<US>mult_lo_<mode>"
5503  [(match_operand:<V_double_width> 0 "register_operand" "")
5504   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5505   (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5506 "TARGET_NEON"
5507 {
5508   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5509   emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5510   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5511
5512   DONE;
5513
5514 }
5515)
5516
5517(define_expand "vec_widen_<US>shiftl_hi_<mode>"
5518 [(match_operand:<V_double_width> 0 "register_operand" "")
5519   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5520   (match_operand:SI 2 "immediate_operand" "i")]
5521 "TARGET_NEON"
5522 {
5523   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5524   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5525   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5526
5527   DONE;
5528 }
5529)
5530
5531(define_expand "vec_widen_<US>shiftl_lo_<mode>"
5532  [(match_operand:<V_double_width> 0 "register_operand" "")
5533   (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5534   (match_operand:SI 2 "immediate_operand" "i")]
5535 "TARGET_NEON"
5536 {
5537   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5538   emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5539   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5540
5541   DONE;
5542 }
5543)
5544
5545; FIXME: These instruction patterns can't be used safely in big-endian mode
5546; because the ordering of vector elements in Q registers is different from what
5547; the semantics of the instructions require.
5548
5549(define_insn "vec_pack_trunc_<mode>"
5550 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5551       (vec_concat:<V_narrow_pack>
5552		(truncate:<V_narrow>
5553			(match_operand:VN 1 "register_operand" "w"))
5554		(truncate:<V_narrow>
5555			(match_operand:VN 2 "register_operand" "w"))))]
5556 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5557 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5558 [(set_attr "type" "multiple")
5559  (set_attr "length" "8")]
5560)
5561
5562;; For the non-quad case.
5563(define_insn "neon_vec_pack_trunc_<mode>"
5564 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5565       (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5566 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5567 "vmovn.i<V_sz_elem>\t%P0, %q1"
5568 [(set_attr "type" "neon_move_narrow_q")]
5569)
5570
5571(define_expand "vec_pack_trunc_<mode>"
5572 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5573  (match_operand:VSHFT 1 "register_operand" "")
5574  (match_operand:VSHFT 2 "register_operand")]
5575 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5576{
5577  rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5578
5579  emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5580  emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5581  emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5582  DONE;
5583})
5584
5585(define_insn "neon_vabd<mode>_2"
5586 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
5587       (abs:VCVTF (minus:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
5588			 (match_operand:VCVTF 2 "s_register_operand" "w"))))]
5589 "TARGET_NEON && flag_unsafe_math_optimizations"
5590 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5591 [(set_attr "type" "neon_fp_abd_s<q>")]
5592)
5593
5594(define_insn "neon_vabd<mode>_3"
5595 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
5596       (abs:VCVTF (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
5597			    (match_operand:VCVTF 2 "s_register_operand" "w")]
5598		UNSPEC_VSUB)))]
5599 "TARGET_NEON && flag_unsafe_math_optimizations"
5600 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5601 [(set_attr "type" "neon_fp_abd_s<q>")]
5602)
5603
5604;; Copy from core-to-neon regs, then extend, not vice-versa
5605
5606(define_split
5607  [(set (match_operand:DI 0 "s_register_operand" "")
5608	(sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5609  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5610  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5611   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5612  {
5613    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5614  })
5615
5616(define_split
5617  [(set (match_operand:DI 0 "s_register_operand" "")
5618	(sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5619  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5620  [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5621   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5622  {
5623    operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5624  })
5625
5626(define_split
5627  [(set (match_operand:DI 0 "s_register_operand" "")
5628	(sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5629  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5630  [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5631   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5632  {
5633    operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5634  })
5635
5636(define_split
5637  [(set (match_operand:DI 0 "s_register_operand" "")
5638	(zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5639  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5640  [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5641   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5642  {
5643    operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5644  })
5645
5646(define_split
5647  [(set (match_operand:DI 0 "s_register_operand" "")
5648	(zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5649  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5650  [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5651   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5652  {
5653    operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5654  })
5655
5656(define_split
5657  [(set (match_operand:DI 0 "s_register_operand" "")
5658	(zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5659  "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5660  [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5661   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5662  {
5663    operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5664  })
5665