1// Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
2// Copyright (c) 2020, 2021, Arm Limited. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
26
27// AArch64 NEON Architecture Description File
28
29// ====================VECTOR INSTRUCTIONS==================================
30
31// ------------------------------ Load/store/reinterpret -----------------------
32
33// Load Vector (16 bits)
34instruct loadV2(vecD dst, vmem2 mem)
35%{
36  predicate(n->as_LoadVector()->memory_size() == 2);
37  match(Set dst (LoadVector mem));
38  ins_cost(4 * INSN_COST);
39  format %{ "ldrh   $dst,$mem\t# vector (16 bits)" %}
40  ins_encode( aarch64_enc_ldrvH(dst, mem) );
41  ins_pipe(vload_reg_mem64);
42%}
43
44// Load Vector (32 bits)
45instruct loadV4(vecD dst, vmem4 mem)
46%{
47  predicate(n->as_LoadVector()->memory_size() == 4);
48  match(Set dst (LoadVector mem));
49  ins_cost(4 * INSN_COST);
50  format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
51  ins_encode( aarch64_enc_ldrvS(dst, mem) );
52  ins_pipe(vload_reg_mem64);
53%}
54
55// Load Vector (64 bits)
56instruct loadV8(vecD dst, vmem8 mem)
57%{
58  predicate(n->as_LoadVector()->memory_size() == 8);
59  match(Set dst (LoadVector mem));
60  ins_cost(4 * INSN_COST);
61  format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
62  ins_encode( aarch64_enc_ldrvD(dst, mem) );
63  ins_pipe(vload_reg_mem64);
64%}
65
66// Load Vector (128 bits)
67instruct loadV16(vecX dst, vmem16 mem)
68%{
69  predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
70  match(Set dst (LoadVector mem));
71  ins_cost(4 * INSN_COST);
72  format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
73  ins_encode( aarch64_enc_ldrvQ(dst, mem) );
74  ins_pipe(vload_reg_mem128);
75%}
76
77// Store Vector (16 bits)
78instruct storeV2(vecD src, vmem2 mem)
79%{
80  predicate(n->as_StoreVector()->memory_size() == 2);
81  match(Set mem (StoreVector mem src));
82  ins_cost(4 * INSN_COST);
83  format %{ "strh   $mem,$src\t# vector (16 bits)" %}
84  ins_encode( aarch64_enc_strvH(src, mem) );
85  ins_pipe(vstore_reg_mem64);
86%}
87
88// Store Vector (32 bits)
89instruct storeV4(vecD src, vmem4 mem)
90%{
91  predicate(n->as_StoreVector()->memory_size() == 4);
92  match(Set mem (StoreVector mem src));
93  ins_cost(4 * INSN_COST);
94  format %{ "strs   $mem,$src\t# vector (32 bits)" %}
95  ins_encode( aarch64_enc_strvS(src, mem) );
96  ins_pipe(vstore_reg_mem64);
97%}
98
99// Store Vector (64 bits)
100instruct storeV8(vecD src, vmem8 mem)
101%{
102  predicate(n->as_StoreVector()->memory_size() == 8);
103  match(Set mem (StoreVector mem src));
104  ins_cost(4 * INSN_COST);
105  format %{ "strd   $mem,$src\t# vector (64 bits)" %}
106  ins_encode( aarch64_enc_strvD(src, mem) );
107  ins_pipe(vstore_reg_mem64);
108%}
109
110// Store Vector (128 bits)
111instruct storeV16(vecX src, vmem16 mem)
112%{
113  predicate(n->as_StoreVector()->memory_size() == 16);
114  match(Set mem (StoreVector mem src));
115  ins_cost(4 * INSN_COST);
116  format %{ "strq   $mem,$src\t# vector (128 bits)" %}
117  ins_encode( aarch64_enc_strvQ(src, mem) );
118  ins_pipe(vstore_reg_mem128);
119%}
120
121instruct reinterpretD(vecD dst)
122%{
123  predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
124            n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
125  match(Set dst (VectorReinterpret dst));
126  ins_cost(0);
127  format %{ " # reinterpret $dst" %}
128  ins_encode %{
129    // empty
130  %}
131  ins_pipe(pipe_class_empty);
132%}
133
134instruct reinterpretX(vecX dst)
135%{
136  predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
137            n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
138  match(Set dst (VectorReinterpret dst));
139  ins_cost(0);
140  format %{ " # reinterpret $dst" %}
141  ins_encode %{
142    // empty
143  %}
144  ins_pipe(pipe_class_empty);
145%}
146
147instruct reinterpretD2X(vecX dst, vecD src)
148%{
149  predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
150            n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
151  match(Set dst (VectorReinterpret src));
152  ins_cost(INSN_COST);
153  format %{ " # reinterpret $dst,$src\t# D2X" %}
154  ins_encode %{
155    // If registers are the same, no register move is required - the
156    // upper 64 bits of 'src' are expected to have been initialized
157    // to zero.
158    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
159      __ orr(as_FloatRegister($dst$$reg), __ T8B,
160             as_FloatRegister($src$$reg),
161             as_FloatRegister($src$$reg));
162    }
163  %}
164  ins_pipe(vlogical64);
165%}
166
167instruct reinterpretX2D(vecD dst, vecX src)
168%{
169  predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
170            n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
171  match(Set dst (VectorReinterpret src));
172  ins_cost(INSN_COST);
173  format %{ " # reinterpret $dst,$src\t# X2D" %}
174  ins_encode %{
175    // Resize the vector from 128-bits to 64-bits. The higher 64-bits of
176    // the "dst" register must be cleared to zero.
177    __ orr(as_FloatRegister($dst$$reg), __ T8B,
178           as_FloatRegister($src$$reg),
179           as_FloatRegister($src$$reg));
180  %}
181  ins_pipe(vlogical64);
182%}
183
184// ------------------------------ Vector cast -------------------------------
185
186instruct vcvt4Bto4S(vecD dst, vecD src)
187%{
188  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
189  match(Set dst (VectorCastB2X src));
190  format %{ "sxtl  $dst, T8H, $src, T8B\t# convert 4B to 4S vector" %}
191  ins_encode %{
192    __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
193  %}
194  ins_pipe(pipe_class_default);
195%}
196
197instruct vcvt8Bto8S(vecX dst, vecD src)
198%{
199  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
200  match(Set dst (VectorCastB2X src));
201  format %{ "sxtl  $dst, T8H, $src, T8B\t# convert 8B to 8S vector" %}
202  ins_encode %{
203    __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
204  %}
205  ins_pipe(pipe_class_default);
206%}
207
208instruct vcvt4Sto4B(vecD dst, vecD src)
209%{
210  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
211  match(Set dst (VectorCastS2X src));
212  format %{ "xtn  $dst, T8B, $src, T8H\t# convert 4S to 4B vector" %}
213  ins_encode %{
214    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
215  %}
216  ins_pipe(pipe_class_default);
217%}
218
219instruct vcvt8Sto8B(vecD dst, vecX src)
220%{
221  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
222  match(Set dst (VectorCastS2X src));
223  format %{ "xtn  $dst, T8B, $src, T8H\t# convert 8S to 8B vector" %}
224  ins_encode %{
225    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
226  %}
227  ins_pipe(pipe_class_default);
228%}
229
230instruct vcvt4Sto4I(vecX dst, vecD src)
231%{
232  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
233  match(Set dst (VectorCastS2X src));
234  format %{ "sxtl  $dst, T4S, $src, T4H\t# convert 4S to 4I vector" %}
235  ins_encode %{
236    __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
237  %}
238  ins_pipe(pipe_class_default);
239%}
240
241instruct vcvt4Ito4S(vecD dst, vecX src)
242%{
243  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
244  match(Set dst (VectorCastI2X src));
245  format %{ "xtn  $dst, T4H, $src, T4S\t# convert 4I to 4S vector" %}
246  ins_encode %{
247    __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
248  %}
249  ins_pipe(pipe_class_default);
250%}
251
252instruct vcvt2Ito2L(vecX dst, vecD src)
253%{
254  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
255  match(Set dst (VectorCastI2X src));
256  format %{ "sxtl  $dst, T2D, $src, T2S\t# convert 2I to 2L vector" %}
257  ins_encode %{
258    __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
259  %}
260  ins_pipe(pipe_class_default);
261%}
262
263instruct vcvt2Lto2I(vecD dst, vecX src)
264%{
265  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
266  match(Set dst (VectorCastL2X src));
267  format %{ "xtn  $dst, T2S, $src, T2D\t# convert 2L to 2I vector" %}
268  ins_encode %{
269    __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
270  %}
271  ins_pipe(pipe_class_default);
272%}
273
274instruct vcvt4Bto4I(vecX dst, vecD src)
275%{
276  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
277  match(Set dst (VectorCastB2X src));
278  format %{ "sxtl  $dst, T8H, $src, T8B\n\t"
279            "sxtl  $dst, T4S, $dst, T4H\t# convert 4B to 4I vector"
280  %}
281  ins_encode %{
282    __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
283    __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
284  %}
285  ins_pipe(pipe_slow);
286%}
287
288instruct vcvt4Ito4B(vecD dst, vecX src)
289%{
290  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
291  match(Set dst (VectorCastI2X src));
292  format %{ "xtn  $dst, T4H, $src, T4S\n\t"
293            "xtn  $dst, T8B, $dst, T8H\t# convert 4I to 4B vector"
294  %}
295  ins_encode %{
296    __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
297    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
298  %}
299  ins_pipe(pipe_slow);
300%}
301
302instruct vcvt4Bto4F(vecX dst, vecD src)
303%{
304  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
305  match(Set dst (VectorCastB2X src));
306  format %{ "sxtl  $dst, T8H, $src, T8B\n\t"
307            "sxtl  $dst, T4S, $dst, T4H\n\t"
308            "scvtfv  T4S, $dst, $dst\t# convert 4B to 4F vector"
309  %}
310  ins_encode %{
311    __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
312    __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
313    __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
314  %}
315  ins_pipe(pipe_slow);
316%}
317
318instruct vcvt4Sto4F(vecX dst, vecD src)
319%{
320  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
321  match(Set dst (VectorCastS2X src));
322  format %{ "sxtl    $dst, T4S, $src, T4H\n\t"
323            "scvtfv  T4S, $dst, $dst\t# convert 4S to 4F vector"
324  %}
325  ins_encode %{
326    __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
327    __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
328  %}
329  ins_pipe(pipe_slow);
330%}
331
332instruct vcvt2Ito2D(vecX dst, vecD src)
333%{
334  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
335  match(Set dst (VectorCastI2X src));
336  format %{ "sxtl    $dst, T2D, $src, T2S\n\t"
337            "scvtfv  T2D, $dst, $dst\t# convert 2I to 2D vector"
338  %}
339  ins_encode %{
340    __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
341    __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
342  %}
343  ins_pipe(pipe_slow);
344%}
345
346instruct vcvt2Ito2F(vecD dst, vecD src)
347%{
348  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
349  match(Set dst (VectorCastI2X src));
350  format %{ "scvtfv  T2S, $dst, $src\t# convert 2I to 2F vector" %}
351  ins_encode %{
352    __ scvtfv(__ T2S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
353  %}
354  ins_pipe(pipe_class_default);
355%}
356
357instruct vcvt4Ito4F(vecX dst, vecX src)
358%{
359  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
360  match(Set dst (VectorCastI2X src));
361  format %{ "scvtfv  T4S, $dst, $src\t# convert 4I to 4F vector" %}
362  ins_encode %{
363    __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
364  %}
365  ins_pipe(pipe_class_default);
366%}
367
368instruct vcvt2Lto2D(vecX dst, vecX src)
369%{
370  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
371  match(Set dst (VectorCastL2X src));
372  format %{ "scvtfv  T2D, $dst, $src\t# convert 2L to 2D vector" %}
373  ins_encode %{
374    __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
375  %}
376  ins_pipe(pipe_class_default);
377%}
378
379instruct vcvt2Fto2D(vecX dst, vecD src)
380%{
381  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
382  match(Set dst (VectorCastF2X src));
383  format %{ "fcvtl  $dst, T2D, $src, T2S\t# convert 2F to 2D vector" %}
384  ins_encode %{
385    __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
386  %}
387  ins_pipe(pipe_class_default);
388%}
389
390instruct vcvt2Dto2F(vecD dst, vecX src)
391%{
392  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
393  match(Set dst (VectorCastD2X src));
394  format %{ "fcvtn  $dst, T2S, $src, T2D\t# convert 2D to 2F vector" %}
395  ins_encode %{
396    __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
397  %}
398  ins_pipe(pipe_class_default);
399%}
400
401instruct vcvt2Lto2F(vecD dst, vecX src)
402%{
403  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
404  match(Set dst (VectorCastL2X src));
405  format %{ "scvtfv  T2D, $dst, $src\n\t"
406            "fcvtn   $dst, T2S, $dst, T2D\t# convert 2L to 2F vector"
407  %}
408  ins_encode %{
409    __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
410    __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);
411  %}
412  ins_pipe(pipe_slow);
413%}
414
415// ------------------------------ Reduction -------------------------------
416
417instruct reduce_add8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
418%{
419  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
420  match(Set dst (AddReductionVI isrc vsrc));
421  ins_cost(INSN_COST);
422  effect(TEMP_DEF dst, TEMP tmp);
423  format %{ "addv  $tmp, T8B, $vsrc\n\t"
424            "smov  $dst, $tmp, B, 0\n\t"
425            "addw  $dst, $dst, $isrc\n\t"
426            "sxtb  $dst, $dst\t# add reduction8B"
427  %}
428  ins_encode %{
429    __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
430    __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
431    __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
432    __ sxtb($dst$$Register, $dst$$Register);
433  %}
434  ins_pipe(pipe_slow);
435%}
436
437instruct reduce_add16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
438%{
439  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
440  match(Set dst (AddReductionVI isrc vsrc));
441  ins_cost(INSN_COST);
442  effect(TEMP_DEF dst, TEMP tmp);
443  format %{ "addv  $tmp, T16B, $vsrc\n\t"
444            "smov  $dst, $tmp, B, 0\n\t"
445            "addw  $dst, $dst, $isrc\n\t"
446            "sxtb  $dst, $dst\t# add reduction16B"
447  %}
448  ins_encode %{
449    __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
450    __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
451    __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
452    __ sxtb($dst$$Register, $dst$$Register);
453  %}
454  ins_pipe(pipe_slow);
455%}
456
457instruct reduce_add4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
458%{
459  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
460  match(Set dst (AddReductionVI isrc vsrc));
461  ins_cost(INSN_COST);
462  effect(TEMP_DEF dst, TEMP tmp);
463  format %{ "addv  $tmp, T4H, $vsrc\n\t"
464            "smov  $dst, $tmp, H, 0\n\t"
465            "addw  $dst, $dst, $isrc\n\t"
466            "sxth  $dst, $dst\t# add reduction4S"
467  %}
468  ins_encode %{
469    __ addv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
470    __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
471    __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
472    __ sxth($dst$$Register, $dst$$Register);
473  %}
474  ins_pipe(pipe_slow);
475%}
476
477instruct reduce_add8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
478%{
479  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
480  match(Set dst (AddReductionVI isrc vsrc));
481  ins_cost(INSN_COST);
482  effect(TEMP_DEF dst, TEMP tmp);
483  format %{ "addv  $tmp, T8H, $vsrc\n\t"
484            "smov  $dst, $tmp, H, 0\n\t"
485            "addw  $dst, $dst, $isrc\n\t"
486            "sxth  $dst, $dst\t# add reduction8S"
487  %}
488  ins_encode %{
489    __ addv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
490    __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
491    __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
492    __ sxth($dst$$Register, $dst$$Register);
493  %}
494  ins_pipe(pipe_slow);
495%}
496
497instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
498%{
499  match(Set dst (AddReductionVL isrc vsrc));
500  ins_cost(INSN_COST);
501  effect(TEMP_DEF dst, TEMP tmp);
502  format %{ "addpd $tmp, $vsrc\n\t"
503            "umov  $dst, $tmp, D, 0\n\t"
504            "add   $dst, $isrc, $dst\t# add reduction2L"
505  %}
506  ins_encode %{
507    __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
508    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
509    __ add($dst$$Register, $isrc$$Register, $dst$$Register);
510  %}
511  ins_pipe(pipe_slow);
512%}
513
514instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
515%{
516  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
517  match(Set dst (MulReductionVI isrc vsrc));
518  ins_cost(INSN_COST);
519  effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
520  format %{ "ins   $vtmp1, S, $vsrc, 0, 1\n\t"
521            "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
522            "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
523            "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
524            "umov  $itmp, $vtmp2, B, 0\n\t"
525            "mulw  $dst, $itmp, $isrc\n\t"
526            "sxtb  $dst, $dst\n\t"
527            "umov  $itmp, $vtmp2, B, 1\n\t"
528            "mulw  $dst, $itmp, $dst\n\t"
529            "sxtb  $dst, $dst\t# mul reduction8B"
530  %}
531  ins_encode %{
532    __ ins(as_FloatRegister($vtmp1$$reg), __ S,
533           as_FloatRegister($vsrc$$reg), 0, 1);
534    __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
535            as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
536    __ ins(as_FloatRegister($vtmp2$$reg), __ H,
537           as_FloatRegister($vtmp1$$reg), 0, 1);
538    __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
539            as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
540    __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
541    __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
542    __ sxtb($dst$$Register, $dst$$Register);
543    __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
544    __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
545    __ sxtb($dst$$Register, $dst$$Register);
546  %}
547  ins_pipe(pipe_slow);
548%}
549
550instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
551%{
552  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
553  match(Set dst (MulReductionVI isrc vsrc));
554  ins_cost(INSN_COST);
555  effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
556  format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
557            "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
558            "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
559            "mulv  $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
560            "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
561            "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
562            "umov  $itmp, $vtmp2, B, 0\n\t"
563            "mulw  $dst, $itmp, $isrc\n\t"
564            "sxtb  $dst, $dst\n\t"
565            "umov  $itmp, $vtmp2, B, 1\n\t"
566            "mulw  $dst, $itmp, $dst\n\t"
567            "sxtb  $dst, $dst\t# mul reduction16B"
568  %}
569  ins_encode %{
570    __ ins(as_FloatRegister($vtmp1$$reg), __ D,
571           as_FloatRegister($vsrc$$reg), 0, 1);
572    __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
573            as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
574    __ ins(as_FloatRegister($vtmp2$$reg), __ S,
575           as_FloatRegister($vtmp1$$reg), 0, 1);
576    __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
577            as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
578    __ ins(as_FloatRegister($vtmp2$$reg), __ H,
579           as_FloatRegister($vtmp1$$reg), 0, 1);
580    __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
581            as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
582    __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
583    __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
584    __ sxtb($dst$$Register, $dst$$Register);
585    __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
586    __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
587    __ sxtb($dst$$Register, $dst$$Register);
588  %}
589  ins_pipe(pipe_slow);
590%}
591
592instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
593%{
594  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
595  match(Set dst (MulReductionVI isrc vsrc));
596  ins_cost(INSN_COST);
597  effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
598  format %{ "ins   $vtmp, S, $vsrc, 0, 1\n\t"
599            "mulv  $vtmp, T4H, $vtmp, $vsrc\n\t"
600            "umov  $itmp, $vtmp, H, 0\n\t"
601            "mulw  $dst, $itmp, $isrc\n\t"
602            "sxth  $dst, $dst\n\t"
603            "umov  $itmp, $vtmp, H, 1\n\t"
604            "mulw  $dst, $itmp, $dst\n\t"
605            "sxth  $dst, $dst\t# mul reduction4S"
606  %}
607  ins_encode %{
608    __ ins(as_FloatRegister($vtmp$$reg), __ S,
609           as_FloatRegister($vsrc$$reg), 0, 1);
610    __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
611            as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
612    __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
613    __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
614    __ sxth($dst$$Register, $dst$$Register);
615    __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
616    __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
617    __ sxth($dst$$Register, $dst$$Register);
618  %}
619  ins_pipe(pipe_slow);
620%}
621
622instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
623%{
624  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
625  match(Set dst (MulReductionVI isrc vsrc));
626  ins_cost(INSN_COST);
627  effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
628  format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
629            "mulv  $vtmp1, T4H, $vtmp1, $vsrc\n\t"
630            "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
631            "mulv  $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
632            "umov  $itmp, $vtmp2, H, 0\n\t"
633            "mulw  $dst, $itmp, $isrc\n\t"
634            "sxth  $dst, $dst\n\t"
635            "umov  $itmp, $vtmp2, H, 1\n\t"
636            "mulw  $dst, $itmp, $dst\n\t"
637            "sxth  $dst, $dst\t# mul reduction8S"
638  %}
639  ins_encode %{
640    __ ins(as_FloatRegister($vtmp1$$reg), __ D,
641           as_FloatRegister($vsrc$$reg), 0, 1);
642    __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
643            as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
644    __ ins(as_FloatRegister($vtmp2$$reg), __ S,
645           as_FloatRegister($vtmp1$$reg), 0, 1);
646    __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
647            as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
648    __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
649    __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
650    __ sxth($dst$$Register, $dst$$Register);
651    __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
652    __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
653    __ sxth($dst$$Register, $dst$$Register);
654  %}
655  ins_pipe(pipe_slow);
656%}
657
658instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
659%{
660  match(Set dst (MulReductionVL isrc vsrc));
661  ins_cost(INSN_COST);
662  effect(TEMP_DEF dst, TEMP tmp);
663  format %{ "umov  $tmp, $vsrc, D, 0\n\t"
664            "mul   $dst, $isrc, $tmp\n\t"
665            "umov  $tmp, $vsrc, D, 1\n\t"
666            "mul   $dst, $dst, $tmp\t# mul reduction2L"
667  %}
668  ins_encode %{
669    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
670    __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
671    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
672    __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
673  %}
674  ins_pipe(pipe_slow);
675%}
676
677instruct reduce_max8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
678%{
679  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
680  match(Set dst (MaxReductionV isrc vsrc));
681  ins_cost(INSN_COST);
682  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
683  format %{ "smaxv $tmp, T8B, $vsrc\n\t"
684            "smov  $dst, $tmp, B, 0\n\t"
685            "cmpw  $dst, $isrc\n\t"
686            "cselw $dst, $dst, $isrc GT\t# max reduction8B"
687  %}
688  ins_encode %{
689    __ smaxv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
690    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
691    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
692    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
693  %}
694  ins_pipe(pipe_slow);
695%}
696
697instruct reduce_max16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
698%{
699  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
700  match(Set dst (MaxReductionV isrc vsrc));
701  ins_cost(INSN_COST);
702  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
703  format %{ "smaxv $tmp, T16B, $vsrc\n\t"
704            "smov  $dst, $tmp, B, 0\n\t"
705            "cmpw  $dst, $isrc\n\t"
706            "cselw $dst, $dst, $isrc GT\t# max reduction16B"
707  %}
708  ins_encode %{
709    __ smaxv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
710    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
711    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
712    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
713  %}
714  ins_pipe(pipe_slow);
715%}
716
717instruct reduce_max4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
718%{
719  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
720  match(Set dst (MaxReductionV isrc vsrc));
721  ins_cost(INSN_COST);
722  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
723  format %{ "smaxv $tmp, T4H, $vsrc\n\t"
724            "smov  $dst, $tmp, H, 0\n\t"
725            "cmpw  $dst, $isrc\n\t"
726            "cselw $dst, $dst, $isrc GT\t# max reduction4S"
727  %}
728  ins_encode %{
729    __ smaxv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
730    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
731    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
732    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
733  %}
734  ins_pipe(pipe_slow);
735%}
736
737instruct reduce_max8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
738%{
739  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
740  match(Set dst (MaxReductionV isrc vsrc));
741  ins_cost(INSN_COST);
742  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
743  format %{ "smaxv $tmp, T8H, $vsrc\n\t"
744            "smov  $dst, $tmp, H, 0\n\t"
745            "cmpw  $dst, $isrc\n\t"
746            "cselw $dst, $dst, $isrc GT\t# max reduction8S"
747  %}
748  ins_encode %{
749    __ smaxv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
750    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
751    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
752    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
753  %}
754  ins_pipe(pipe_slow);
755%}
756
757instruct reduce_max4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
758%{
759  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
760  match(Set dst (MaxReductionV isrc vsrc));
761  ins_cost(INSN_COST);
762  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
763  format %{ "smaxv $tmp, T4S, $vsrc\n\t"
764            "umov  $dst, $tmp, S, 0\n\t"
765            "cmpw  $dst, $isrc\n\t"
766            "cselw $dst, $dst, $isrc GT\t# max reduction4I"
767  %}
768  ins_encode %{
769    __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
770    __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
771    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
772    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
773  %}
774  ins_pipe(pipe_slow);
775%}
776
777instruct reduce_min8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
778%{
779  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
780  match(Set dst (MinReductionV isrc vsrc));
781  ins_cost(INSN_COST);
782  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
783  format %{ "sminv $tmp, T8B, $vsrc\n\t"
784            "smov  $dst, $tmp, B, 0\n\t"
785            "cmpw  $dst, $isrc\n\t"
786            "cselw $dst, $dst, $isrc LT\t# min reduction8B"
787  %}
788  ins_encode %{
789    __ sminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
790    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
791    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
792    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
793  %}
794  ins_pipe(pipe_slow);
795%}
796
797instruct reduce_min16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
798%{
799  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
800  match(Set dst (MinReductionV isrc vsrc));
801  ins_cost(INSN_COST);
802  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
803  format %{ "sminv $tmp, T16B, $vsrc\n\t"
804            "smov  $dst, $tmp, B, 0\n\t"
805            "cmpw  $dst, $isrc\n\t"
806            "cselw $dst, $dst, $isrc LT\t# min reduction16B"
807  %}
808  ins_encode %{
809    __ sminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
810    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
811    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
812    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
813  %}
814  ins_pipe(pipe_slow);
815%}
816
817instruct reduce_min4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
818%{
819  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
820  match(Set dst (MinReductionV isrc vsrc));
821  ins_cost(INSN_COST);
822  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
823  format %{ "sminv $tmp, T4H, $vsrc\n\t"
824            "smov  $dst, $tmp, H, 0\n\t"
825            "cmpw  $dst, $isrc\n\t"
826            "cselw $dst, $dst, $isrc LT\t# min reduction4S"
827  %}
828  ins_encode %{
829    __ sminv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
830    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
831    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
832    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
833  %}
834  ins_pipe(pipe_slow);
835%}
836
837instruct reduce_min8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
838%{
839  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
840  match(Set dst (MinReductionV isrc vsrc));
841  ins_cost(INSN_COST);
842  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
843  format %{ "sminv $tmp, T8H, $vsrc\n\t"
844            "smov  $dst, $tmp, H, 0\n\t"
845            "cmpw  $dst, $isrc\n\t"
846            "cselw $dst, $dst, $isrc LT\t# min reduction8S"
847  %}
848  ins_encode %{
849    __ sminv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
850    __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
851    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
852    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
853  %}
854  ins_pipe(pipe_slow);
855%}
856
857instruct reduce_min4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
858%{
859  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
860  match(Set dst (MinReductionV isrc vsrc));
861  ins_cost(INSN_COST);
862  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
863  format %{ "sminv $tmp, T4S, $vsrc\n\t"
864            "umov  $dst, $tmp, S, 0\n\t"
865            "cmpw  $dst, $isrc\n\t"
866            "cselw $dst, $dst, $isrc LT\t# min reduction4I"
867  %}
868  ins_encode %{
869    __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
870    __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
871    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
872    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
873  %}
874  ins_pipe(pipe_slow);
875%}
876
877instruct reduce_max2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
878%{
879  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
880  match(Set dst (MaxReductionV isrc vsrc));
881  ins_cost(INSN_COST);
882  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
883  format %{ "smaxp $tmp, T2S, $vsrc, $vsrc\n\t"
884            "umov  $dst, $tmp, S, 0\n\t"
885            "cmpw  $dst, $isrc\n\t"
886            "cselw $dst, $dst, $isrc GT\t# max reduction2I"
887  %}
888  ins_encode %{
889    __ smaxp(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
890    __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
891    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
892    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
893  %}
894  ins_pipe(pipe_slow);
895%}
896
897instruct reduce_min2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
898%{
899  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
900  match(Set dst (MinReductionV isrc vsrc));
901  ins_cost(INSN_COST);
902  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
903  format %{ "sminp $tmp, T2S, $vsrc, $vsrc\n\t"
904            "umov  $dst, $tmp, S, 0\n\t"
905            "cmpw  $dst, $isrc\n\t"
906            "cselw $dst, $dst, $isrc LT\t# min reduction2I"
907  %}
908  ins_encode %{
909    __ sminp(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
910    __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
911    __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
912    __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
913  %}
914  ins_pipe(pipe_slow);
915%}
916
917instruct reduce_max2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
918%{
919  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
920  match(Set dst (MaxReductionV isrc vsrc));
921  ins_cost(INSN_COST);
922  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
923  format %{ "umov  $tmp, $vsrc, D, 0\n\t"
924            "cmp   $isrc,$tmp\n\t"
925            "csel  $dst, $isrc, $tmp GT\n\t"
926            "umov  $tmp, $vsrc, D, 1\n\t"
927            "cmp   $dst, $tmp\n\t"
928            "csel  $dst, $dst, $tmp GT\t# max reduction2L"
929  %}
930  ins_encode %{
931    __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
932    __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
933    __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::GT);
934    __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
935    __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
936    __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::GT);
937  %}
938  ins_pipe(pipe_slow);
939%}
940
941instruct reduce_min2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
942%{
943  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
944  match(Set dst (MinReductionV isrc vsrc));
945  ins_cost(INSN_COST);
946  effect(TEMP_DEF dst, TEMP tmp, KILL cr);
947  format %{ "umov  $tmp, $vsrc, D, 0\n\t"
948            "cmp   $isrc,$tmp\n\t"
949            "csel  $dst, $isrc, $tmp LT\n\t"
950            "umov  $tmp, $vsrc, D, 1\n\t"
951            "cmp   $dst, $tmp\n\t"
952            "csel  $dst, $dst, $tmp LT\t# min reduction2L"
953  %}
954  ins_encode %{
955    __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
956    __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
957    __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::LT);
958    __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
959    __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
960    __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::LT);
961  %}
962  ins_pipe(pipe_slow);
963%}
964
965instruct reduce_max2F(vRegF dst, vRegF fsrc, vecD vsrc) %{
966  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
967  match(Set dst (MaxReductionV fsrc vsrc));
968  ins_cost(INSN_COST);
969  effect(TEMP_DEF dst);
970  format %{ "fmaxp $dst, $vsrc, S\n\t"
971            "fmaxs $dst, $dst, $fsrc\t# max reduction2F" %}
972  ins_encode %{
973    __ fmaxp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ S);
974    __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
975  %}
976  ins_pipe(pipe_class_default);
977%}
978
979instruct reduce_max4F(vRegF dst, vRegF fsrc, vecX vsrc) %{
980  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
981  match(Set dst (MaxReductionV fsrc vsrc));
982  ins_cost(INSN_COST);
983  effect(TEMP_DEF dst);
984  format %{ "fmaxv $dst,  T4S, $vsrc\n\t"
985            "fmaxs $dst, $dst, $fsrc\t# max reduction4F" %}
986  ins_encode %{
987    __ fmaxv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
988    __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
989  %}
990  ins_pipe(pipe_class_default);
991%}
992
993instruct reduce_max2D(vRegD dst, vRegD dsrc, vecX vsrc) %{
994  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
995  match(Set dst (MaxReductionV dsrc vsrc));
996  ins_cost(INSN_COST);
997  effect(TEMP_DEF dst);
998  format %{ "fmaxp $dst, $vsrc, D\n\t"
999            "fmaxd $dst, $dst, $dsrc\t# max reduction2D" %}
1000  ins_encode %{
1001    __ fmaxp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ D);
1002    __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($dsrc$$reg));
1003  %}
1004  ins_pipe(pipe_class_default);
1005%}
1006
1007instruct reduce_min2F(vRegF dst, vRegF fsrc, vecD vsrc) %{
1008  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1009  match(Set dst (MinReductionV fsrc vsrc));
1010  ins_cost(INSN_COST);
1011  effect(TEMP_DEF dst);
1012  format %{ "fminp $dst, $vsrc, S\n\t"
1013            "fmins $dst, $dst, $fsrc\t# min reduction2F" %}
1014  ins_encode %{
1015    __ fminp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ S);
1016    __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
1017  %}
1018  ins_pipe(pipe_class_default);
1019%}
1020
1021instruct reduce_min4F(vRegF dst, vRegF fsrc, vecX vsrc) %{
1022  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1023  match(Set dst (MinReductionV fsrc vsrc));
1024  ins_cost(INSN_COST);
1025  effect(TEMP_DEF dst);
1026  format %{ "fminv $dst,  T4S, $vsrc\n\t"
1027            "fmins $dst, $dst, $fsrc\t# min reduction4F" %}
1028  ins_encode %{
1029    __ fminv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
1030    __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
1031  %}
1032  ins_pipe(pipe_class_default);
1033%}
1034
1035instruct reduce_min2D(vRegD dst, vRegD dsrc, vecX vsrc) %{
1036  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1037  match(Set dst (MinReductionV dsrc vsrc));
1038  ins_cost(INSN_COST);
1039  effect(TEMP_DEF dst);
1040  format %{ "fminp $dst, $vsrc, D\n\t"
1041            "fmind $dst, $dst, $dsrc\t# min reduction2D" %}
1042  ins_encode %{
1043    __ fminp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ D);
1044    __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($dsrc$$reg));
1045  %}
1046  ins_pipe(pipe_class_default);
1047%}
1048
1049instruct reduce_and8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1050%{
1051  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1052  match(Set dst (AndReductionV isrc vsrc));
1053  ins_cost(INSN_COST);
1054  effect(TEMP_DEF dst, TEMP tmp);
1055  format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1056            "umov   $dst, $vsrc, S, 1\n\t"
1057            "andw   $dst, $dst, $tmp\n\t"
1058            "andw   $dst, $dst, $dst, LSR #16\n\t"
1059            "andw   $dst, $dst, $dst, LSR #8\n\t"
1060            "andw   $dst, $isrc, $dst\n\t"
1061            "sxtb   $dst, $dst\t# and reduction8B"
1062  %}
1063  ins_encode %{
1064    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1065    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1066    __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
1067    __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1068    __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1069    __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1070    __ sxtb($dst$$Register, $dst$$Register);
1071  %}
1072  ins_pipe(pipe_slow);
1073%}
1074
1075instruct reduce_orr8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1076%{
1077  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1078  match(Set dst (OrReductionV isrc vsrc));
1079  ins_cost(INSN_COST);
1080  effect(TEMP_DEF dst, TEMP tmp);
1081  format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1082            "umov   $dst, $vsrc, S, 1\n\t"
1083            "orrw   $dst, $dst, $tmp\n\t"
1084            "orrw   $dst, $dst, $dst, LSR #16\n\t"
1085            "orrw   $dst, $dst, $dst, LSR #8\n\t"
1086            "orrw   $dst, $isrc, $dst\n\t"
1087            "sxtb   $dst, $dst\t# orr reduction8B"
1088  %}
1089  ins_encode %{
1090    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1091    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1092    __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
1093    __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1094    __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1095    __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1096    __ sxtb($dst$$Register, $dst$$Register);
1097  %}
1098  ins_pipe(pipe_slow);
1099%}
1100
1101instruct reduce_eor8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1102%{
1103  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1104  match(Set dst (XorReductionV isrc vsrc));
1105  ins_cost(INSN_COST);
1106  effect(TEMP_DEF dst, TEMP tmp);
1107  format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1108            "umov   $dst, $vsrc, S, 1\n\t"
1109            "eorw   $dst, $dst, $tmp\n\t"
1110            "eorw   $dst, $dst, $dst, LSR #16\n\t"
1111            "eorw   $dst, $dst, $dst, LSR #8\n\t"
1112            "eorw   $dst, $isrc, $dst\n\t"
1113            "sxtb   $dst, $dst\t# eor reduction8B"
1114  %}
1115  ins_encode %{
1116    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1117    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1118    __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
1119    __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1120    __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1121    __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1122    __ sxtb($dst$$Register, $dst$$Register);
1123  %}
1124  ins_pipe(pipe_slow);
1125%}
1126
1127instruct reduce_and16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1128%{
1129  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1130  match(Set dst (AndReductionV isrc vsrc));
1131  ins_cost(INSN_COST);
1132  effect(TEMP_DEF dst, TEMP tmp);
1133  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1134            "umov   $dst, $vsrc, D, 1\n\t"
1135            "andr   $dst, $dst, $tmp\n\t"
1136            "andr   $dst, $dst, $dst, LSR #32\n\t"
1137            "andw   $dst, $dst, $dst, LSR #16\n\t"
1138            "andw   $dst, $dst, $dst, LSR #8\n\t"
1139            "andw   $dst, $isrc, $dst\n\t"
1140            "sxtb   $dst, $dst\t# and reduction16B"
1141  %}
1142  ins_encode %{
1143    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1144    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1145    __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1146    __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1147    __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1148    __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1149    __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1150    __ sxtb($dst$$Register, $dst$$Register);
1151  %}
1152  ins_pipe(pipe_slow);
1153%}
1154
1155instruct reduce_orr16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1156%{
1157  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1158  match(Set dst (OrReductionV isrc vsrc));
1159  ins_cost(INSN_COST);
1160  effect(TEMP_DEF dst, TEMP tmp);
1161  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1162            "umov   $dst, $vsrc, D, 1\n\t"
1163            "orr    $dst, $dst, $tmp\n\t"
1164            "orr    $dst, $dst, $dst, LSR #32\n\t"
1165            "orrw   $dst, $dst, $dst, LSR #16\n\t"
1166            "orrw   $dst, $dst, $dst, LSR #8\n\t"
1167            "orrw   $dst, $isrc, $dst\n\t"
1168            "sxtb   $dst, $dst\t# orr reduction16B"
1169  %}
1170  ins_encode %{
1171    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1172    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1173    __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1174    __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1175    __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1176    __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1177    __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1178    __ sxtb($dst$$Register, $dst$$Register);
1179  %}
1180  ins_pipe(pipe_slow);
1181%}
1182
1183instruct reduce_eor16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1184%{
1185  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1186  match(Set dst (XorReductionV isrc vsrc));
1187  ins_cost(INSN_COST);
1188  effect(TEMP_DEF dst, TEMP tmp);
1189  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1190            "umov   $dst, $vsrc, D, 1\n\t"
1191            "eor    $dst, $dst, $tmp\n\t"
1192            "eor    $dst, $dst, $dst, LSR #32\n\t"
1193            "eorw   $dst, $dst, $dst, LSR #16\n\t"
1194            "eorw   $dst, $dst, $dst, LSR #8\n\t"
1195            "eorw   $dst, $isrc, $dst\n\t"
1196            "sxtb   $dst, $dst\t# eor reduction16B"
1197  %}
1198  ins_encode %{
1199    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1200    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1201    __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1202    __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1203    __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1204    __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1205    __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1206    __ sxtb($dst$$Register, $dst$$Register);
1207  %}
1208  ins_pipe(pipe_slow);
1209%}
1210
1211instruct reduce_and4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1212%{
1213  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1214  match(Set dst (AndReductionV isrc vsrc));
1215  ins_cost(INSN_COST);
1216  effect(TEMP_DEF dst, TEMP tmp);
1217  format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1218            "umov   $dst, $vsrc, S, 1\n\t"
1219            "andw   $dst, $dst, $tmp\n\t"
1220            "andw   $dst, $dst, $dst, LSR #16\n\t"
1221            "andw   $dst, $isrc, $dst\n\t"
1222            "sxth   $dst, $dst\t# and reduction4S"
1223  %}
1224  ins_encode %{
1225    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1226    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1227    __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
1228    __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1229    __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1230    __ sxth($dst$$Register, $dst$$Register);
1231  %}
1232  ins_pipe(pipe_slow);
1233%}
1234
1235instruct reduce_orr4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1236%{
1237  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1238  match(Set dst (OrReductionV isrc vsrc));
1239  ins_cost(INSN_COST);
1240  effect(TEMP_DEF dst, TEMP tmp);
1241  format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1242            "umov   $dst, $vsrc, S, 1\n\t"
1243            "orrw   $dst, $dst, $tmp\n\t"
1244            "orrw   $dst, $dst, $dst, LSR #16\n\t"
1245            "orrw   $dst, $isrc, $dst\n\t"
1246            "sxth   $dst, $dst\t# orr reduction4S"
1247  %}
1248  ins_encode %{
1249    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1250    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1251    __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
1252    __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1253    __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1254    __ sxth($dst$$Register, $dst$$Register);
1255  %}
1256  ins_pipe(pipe_slow);
1257%}
1258
1259instruct reduce_eor4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1260%{
1261  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1262  match(Set dst (XorReductionV isrc vsrc));
1263  ins_cost(INSN_COST);
1264  effect(TEMP_DEF dst, TEMP tmp);
1265  format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1266            "umov   $dst, $vsrc, S, 1\n\t"
1267            "eorw   $dst, $dst, $tmp\n\t"
1268            "eorw   $dst, $dst, $dst, LSR #16\n\t"
1269            "eorw   $dst, $isrc, $dst\n\t"
1270            "sxth   $dst, $dst\t# eor reduction4S"
1271  %}
1272  ins_encode %{
1273    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1274    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1275    __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
1276    __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1277    __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1278    __ sxth($dst$$Register, $dst$$Register);
1279  %}
1280  ins_pipe(pipe_slow);
1281%}
1282
1283instruct reduce_and8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1284%{
1285  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1286  match(Set dst (AndReductionV isrc vsrc));
1287  ins_cost(INSN_COST);
1288  effect(TEMP_DEF dst, TEMP tmp);
1289  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1290            "umov   $dst, $vsrc, D, 1\n\t"
1291            "andr   $dst, $dst, $tmp\n\t"
1292            "andr   $dst, $dst, $dst, LSR #32\n\t"
1293            "andw   $dst, $dst, $dst, LSR #16\n\t"
1294            "andw   $dst, $isrc, $dst\n\t"
1295            "sxth   $dst, $dst\t# and reduction8S"
1296  %}
1297  ins_encode %{
1298    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1299    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1300    __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1301    __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1302    __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1303    __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1304    __ sxth($dst$$Register, $dst$$Register);
1305  %}
1306  ins_pipe(pipe_slow);
1307%}
1308
1309instruct reduce_orr8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1310%{
1311  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1312  match(Set dst (OrReductionV isrc vsrc));
1313  ins_cost(INSN_COST);
1314  effect(TEMP_DEF dst, TEMP tmp);
1315  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1316            "umov   $dst, $vsrc, D, 1\n\t"
1317            "orr    $dst, $dst, $tmp\n\t"
1318            "orr    $dst, $dst, $dst, LSR #32\n\t"
1319            "orrw   $dst, $dst, $dst, LSR #16\n\t"
1320            "orrw   $dst, $isrc, $dst\n\t"
1321            "sxth   $dst, $dst\t# orr reduction8S"
1322  %}
1323  ins_encode %{
1324    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1325    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1326    __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1327    __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1328    __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1329    __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1330    __ sxth($dst$$Register, $dst$$Register);
1331  %}
1332  ins_pipe(pipe_slow);
1333%}
1334
1335instruct reduce_eor8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1336%{
1337  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1338  match(Set dst (XorReductionV isrc vsrc));
1339  ins_cost(INSN_COST);
1340  effect(TEMP_DEF dst, TEMP tmp);
1341  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1342            "umov   $dst, $vsrc, D, 1\n\t"
1343            "eor    $dst, $dst, $tmp\n\t"
1344            "eor    $dst, $dst, $dst, LSR #32\n\t"
1345            "eorw   $dst, $dst, $dst, LSR #16\n\t"
1346            "eorw   $dst, $isrc, $dst\n\t"
1347            "sxth   $dst, $dst\t# eor reduction8S"
1348  %}
1349  ins_encode %{
1350    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1351    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1352    __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1353    __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1354    __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1355    __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1356    __ sxth($dst$$Register, $dst$$Register);
1357  %}
1358  ins_pipe(pipe_slow);
1359%}
1360
1361instruct reduce_and2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1362%{
1363  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1364  match(Set dst (AndReductionV isrc vsrc));
1365  ins_cost(INSN_COST);
1366  effect(TEMP_DEF dst, TEMP tmp);
1367  format %{ "umov  $tmp, $vsrc, S, 0\n\t"
1368            "andw  $dst, $tmp, $isrc\n\t"
1369            "umov  $tmp, $vsrc, S, 1\n\t"
1370            "andw  $dst, $tmp, $dst\t# and reduction2I"
1371  %}
1372  ins_encode %{
1373    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1374    __ andw($dst$$Register, $tmp$$Register, $isrc$$Register);
1375    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1376    __ andw($dst$$Register, $tmp$$Register, $dst$$Register);
1377  %}
1378  ins_pipe(pipe_slow);
1379%}
1380
1381instruct reduce_orr2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1382%{
1383  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1384  match(Set dst (OrReductionV isrc vsrc));
1385  ins_cost(INSN_COST);
1386  effect(TEMP_DEF dst, TEMP tmp);
1387  format %{ "umov  $tmp, $vsrc, S, 0\n\t"
1388            "orrw  $dst, $tmp, $isrc\n\t"
1389            "umov  $tmp, $vsrc, S, 1\n\t"
1390            "orrw  $dst, $tmp, $dst\t# orr reduction2I"
1391  %}
1392  ins_encode %{
1393    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1394    __ orrw($dst$$Register, $tmp$$Register, $isrc$$Register);
1395    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1396    __ orrw($dst$$Register, $tmp$$Register, $dst$$Register);
1397  %}
1398  ins_pipe(pipe_slow);
1399%}
1400
1401instruct reduce_eor2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1402%{
1403  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1404  match(Set dst (XorReductionV isrc vsrc));
1405  ins_cost(INSN_COST);
1406  effect(TEMP_DEF dst, TEMP tmp);
1407  format %{ "umov  $tmp, $vsrc, S, 0\n\t"
1408            "eorw  $dst, $tmp, $isrc\n\t"
1409            "umov  $tmp, $vsrc, S, 1\n\t"
1410            "eorw  $dst, $tmp, $dst\t# eor reduction2I"
1411  %}
1412  ins_encode %{
1413    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1414    __ eorw($dst$$Register, $tmp$$Register, $isrc$$Register);
1415    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1416    __ eorw($dst$$Register, $tmp$$Register, $dst$$Register);
1417  %}
1418  ins_pipe(pipe_slow);
1419%}
1420
1421instruct reduce_and4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1422%{
1423  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1424  match(Set dst (AndReductionV isrc vsrc));
1425  ins_cost(INSN_COST);
1426  effect(TEMP_DEF dst, TEMP tmp);
1427  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1428            "umov   $dst, $vsrc, D, 1\n\t"
1429            "andr   $dst, $dst, $tmp\n\t"
1430            "andr   $dst, $dst, $dst, LSR #32\n\t"
1431            "andw   $dst, $isrc, $dst\t# and reduction4I"
1432  %}
1433  ins_encode %{
1434    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1435    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1436    __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1437    __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1438    __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1439  %}
1440  ins_pipe(pipe_slow);
1441%}
1442
1443instruct reduce_orr4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1444%{
1445  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1446  match(Set dst (OrReductionV isrc vsrc));
1447  ins_cost(INSN_COST);
1448  effect(TEMP_DEF dst, TEMP tmp);
1449  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1450            "umov   $dst, $vsrc, D, 1\n\t"
1451            "orr    $dst, $dst, $tmp\n\t"
1452            "orr    $dst, $dst, $dst, LSR #32\n\t"
1453            "orrw   $dst, $isrc, $dst\t# orr reduction4I"
1454  %}
1455  ins_encode %{
1456    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1457    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1458    __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1459    __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1460    __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1461  %}
1462  ins_pipe(pipe_slow);
1463%}
1464
1465instruct reduce_eor4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1466%{
1467  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1468  match(Set dst (XorReductionV isrc vsrc));
1469  ins_cost(INSN_COST);
1470  effect(TEMP_DEF dst, TEMP tmp);
1471  format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1472            "umov   $dst, $vsrc, D, 1\n\t"
1473            "eor    $dst, $dst, $tmp\n\t"
1474            "eor    $dst, $dst, $dst, LSR #32\n\t"
1475            "eorw   $dst, $isrc, $dst\t# eor reduction4I"
1476  %}
1477  ins_encode %{
1478    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1479    __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1480    __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1481    __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1482    __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1483  %}
1484  ins_pipe(pipe_slow);
1485%}
1486
1487instruct reduce_and2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
1488%{
1489  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1490  match(Set dst (AndReductionV isrc vsrc));
1491  ins_cost(INSN_COST);
1492  effect(TEMP_DEF dst, TEMP tmp);
1493  format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1494            "andr  $dst, $isrc, $tmp\n\t"
1495            "umov  $tmp, $vsrc, D, 1\n\t"
1496            "andr  $dst, $dst, $tmp\t# and reduction2L"
1497  %}
1498  ins_encode %{
1499    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1500    __ andr($dst$$Register, $isrc$$Register, $tmp$$Register);
1501    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1502    __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1503  %}
1504  ins_pipe(pipe_slow);
1505%}
1506
1507instruct reduce_orr2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
1508%{
1509  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1510  match(Set dst (OrReductionV isrc vsrc));
1511  ins_cost(INSN_COST);
1512  effect(TEMP_DEF dst, TEMP tmp);
1513  format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1514            "orr   $dst, $isrc, $tmp\n\t"
1515            "umov  $tmp, $vsrc, D, 1\n\t"
1516            "orr   $dst, $dst, $tmp\t# orr reduction2L"
1517  %}
1518  ins_encode %{
1519    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1520    __ orr ($dst$$Register, $isrc$$Register, $tmp$$Register);
1521    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1522    __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1523  %}
1524  ins_pipe(pipe_slow);
1525%}
1526
1527instruct reduce_eor2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
1528%{
1529  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1530  match(Set dst (XorReductionV isrc vsrc));
1531  ins_cost(INSN_COST);
1532  effect(TEMP_DEF dst, TEMP tmp);
1533  format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1534            "eor   $dst, $isrc, $tmp\n\t"
1535            "umov  $tmp, $vsrc, D, 1\n\t"
1536            "eor   $dst, $dst, $tmp\t# eor reduction2L"
1537  %}
1538  ins_encode %{
1539    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1540    __ eor ($dst$$Register, $isrc$$Register, $tmp$$Register);
1541    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1542    __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1543  %}
1544  ins_pipe(pipe_slow);
1545%}
1546
1547// ------------------------------ Vector insert ---------------------------------
1548
1549instruct insert8B(vecD dst, vecD src, iRegIorL2I val, immI idx)
1550%{
1551  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1552  match(Set dst (VectorInsert (Binary src val) idx));
1553  ins_cost(INSN_COST);
1554  format %{ "orr    $dst, T8B, $src, $src\n\t"
1555            "mov    $dst, T8B, $idx, $val\t# insert into vector(8B)" %}
1556  ins_encode %{
1557    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1558      __ orr(as_FloatRegister($dst$$reg), __ T8B,
1559             as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1560    }
1561    __ mov(as_FloatRegister($dst$$reg), __ T8B, $idx$$constant, $val$$Register);
1562  %}
1563  ins_pipe(pipe_slow);
1564%}
1565
1566instruct insert16B(vecX dst, vecX src, iRegIorL2I val, immI idx)
1567%{
1568  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1569  match(Set dst (VectorInsert (Binary src val) idx));
1570  ins_cost(INSN_COST);
1571  format %{ "orr    $dst, T16B, $src, $src\n\t"
1572            "mov    $dst, T16B, $idx, $val\t# insert into vector(16B)" %}
1573  ins_encode %{
1574    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1575      __ orr(as_FloatRegister($dst$$reg), __ T16B,
1576             as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1577    }
1578    __ mov(as_FloatRegister($dst$$reg), __ T16B, $idx$$constant, $val$$Register);
1579  %}
1580  ins_pipe(pipe_slow);
1581%}
1582
1583instruct insert4S(vecD dst, vecD src, iRegIorL2I val, immI idx)
1584%{
1585  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1586  match(Set dst (VectorInsert (Binary src val) idx));
1587  ins_cost(INSN_COST);
1588  format %{ "orr    $dst, T8B, $src, $src\n\t"
1589            "mov    $dst, T4H, $idx, $val\t# insert into vector(4S)" %}
1590  ins_encode %{
1591    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1592      __ orr(as_FloatRegister($dst$$reg), __ T8B,
1593             as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1594    }
1595    __ mov(as_FloatRegister($dst$$reg), __ T4H, $idx$$constant, $val$$Register);
1596  %}
1597  ins_pipe(pipe_slow);
1598%}
1599
1600instruct insert8S(vecX dst, vecX src, iRegIorL2I val, immI idx)
1601%{
1602  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1603  match(Set dst (VectorInsert (Binary src val) idx));
1604  ins_cost(INSN_COST);
1605  format %{ "orr    $dst, T16B, $src, $src\n\t"
1606            "mov    $dst, T8H, $idx, $val\t# insert into vector(8S)" %}
1607  ins_encode %{
1608    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1609      __ orr(as_FloatRegister($dst$$reg), __ T16B,
1610             as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1611    }
1612    __ mov(as_FloatRegister($dst$$reg), __ T8H, $idx$$constant, $val$$Register);
1613  %}
1614  ins_pipe(pipe_slow);
1615%}
1616
1617instruct insert2I(vecD dst, vecD src, iRegIorL2I val, immI idx)
1618%{
1619  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
1620  match(Set dst (VectorInsert (Binary src val) idx));
1621  ins_cost(INSN_COST);
1622  format %{ "orr    $dst, T8B, $src, $src\n\t"
1623            "mov    $dst, T2S, $idx, $val\t# insert into vector(2I)" %}
1624  ins_encode %{
1625    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1626      __ orr(as_FloatRegister($dst$$reg), __ T8B,
1627             as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1628    }
1629    __ mov(as_FloatRegister($dst$$reg), __ T2S, $idx$$constant, $val$$Register);
1630  %}
1631  ins_pipe(pipe_slow);
1632%}
1633
1634instruct insert4I(vecX dst, vecX src, iRegIorL2I val, immI idx)
1635%{
1636  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
1637  match(Set dst (VectorInsert (Binary src val) idx));
1638  ins_cost(INSN_COST);
1639  format %{ "orr    $dst, T16B, $src, $src\n\t"
1640            "mov    $dst, T4S, $idx, $val\t# insert into vector(4I)" %}
1641  ins_encode %{
1642    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1643      __ orr(as_FloatRegister($dst$$reg), __ T16B,
1644             as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1645    }
1646    __ mov(as_FloatRegister($dst$$reg), __ T4S, $idx$$constant, $val$$Register);
1647  %}
1648  ins_pipe(pipe_slow);
1649%}
1650
1651instruct insert2L(vecX dst, vecX src, iRegL val, immI idx)
1652%{
1653  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1654  match(Set dst (VectorInsert (Binary src val) idx));
1655  ins_cost(INSN_COST);
1656  format %{ "orr    $dst, T16B, $src, $src\n\t"
1657            "mov    $dst, T2D, $idx, $val\t# insert into vector(2L)" %}
1658  ins_encode %{
1659    if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1660      __ orr(as_FloatRegister($dst$$reg), __ T16B,
1661             as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1662    }
1663    __ mov(as_FloatRegister($dst$$reg), __ T2D, $idx$$constant, $val$$Register);
1664  %}
1665  ins_pipe(pipe_slow);
1666%}
1667
1668instruct insert2F(vecD dst, vecD src, vRegF val, immI idx)
1669%{
1670  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1671  match(Set dst (VectorInsert (Binary src val) idx));
1672  ins_cost(INSN_COST);
1673  effect(TEMP_DEF dst);
1674  format %{ "orr    $dst, T8B, $src, $src\n\t"
1675            "ins    $dst, S, $val, $idx, 0\t# insert into vector(2F)" %}
1676  ins_encode %{
1677    __ orr(as_FloatRegister($dst$$reg), __ T8B,
1678           as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1679    __ ins(as_FloatRegister($dst$$reg), __ S,
1680           as_FloatRegister($val$$reg), $idx$$constant, 0);
1681  %}
1682  ins_pipe(pipe_slow);
1683%}
1684
1685instruct insert4F(vecX dst, vecX src, vRegF val, immI idx)
1686%{
1687  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1688  match(Set dst (VectorInsert (Binary src val) idx));
1689  ins_cost(INSN_COST);
1690  effect(TEMP_DEF dst);
1691  format %{ "orr    $dst, T16B, $src, $src\n\t"
1692            "ins    $dst, S, $val, $idx, 0\t# insert into vector(4F)" %}
1693  ins_encode %{
1694    __ orr(as_FloatRegister($dst$$reg), __ T16B,
1695           as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1696    __ ins(as_FloatRegister($dst$$reg), __ S,
1697           as_FloatRegister($val$$reg), $idx$$constant, 0);
1698  %}
1699  ins_pipe(pipe_slow);
1700%}
1701
1702instruct insert2D(vecX dst, vecX src, vRegD val, immI idx)
1703%{
1704  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1705  match(Set dst (VectorInsert (Binary src val) idx));
1706  ins_cost(INSN_COST);
1707  effect(TEMP_DEF dst);
1708  format %{ "orr    $dst, T16B, $src, $src\n\t"
1709            "ins    $dst, D, $val, $idx, 0\t# insert into vector(2D)" %}
1710  ins_encode %{
1711    __ orr(as_FloatRegister($dst$$reg), __ T16B,
1712           as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1713    __ ins(as_FloatRegister($dst$$reg), __ D,
1714           as_FloatRegister($val$$reg), $idx$$constant, 0);
1715  %}
1716  ins_pipe(pipe_slow);
1717%}
1718
1719// ------------------------------ Vector extract ---------------------------------
1720
1721instruct extract8B(iRegINoSp dst, vecD src, immI idx)
1722%{
1723  predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
1724  match(Set dst (ExtractB src idx));
1725  ins_cost(INSN_COST);
1726  format %{ "smov    $dst, $src, B, $idx\t# extract from vector(8B)" %}
1727  ins_encode %{
1728    __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
1729  %}
1730  ins_pipe(pipe_class_default);
1731%}
1732
1733instruct extract16B(iRegINoSp dst, vecX src, immI idx)
1734%{
1735  predicate(n->in(1)->bottom_type()->is_vect()->length() == 16);
1736  match(Set dst (ExtractB src idx));
1737  ins_cost(INSN_COST);
1738  format %{ "smov    $dst, $src, B, $idx\t# extract from vector(16B)" %}
1739  ins_encode %{
1740    __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
1741  %}
1742  ins_pipe(pipe_class_default);
1743%}
1744
1745instruct extract4S(iRegINoSp dst, vecD src, immI idx)
1746%{
1747  predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
1748  match(Set dst (ExtractS src idx));
1749  ins_cost(INSN_COST);
1750  format %{ "smov    $dst, $src, H, $idx\t# extract from vector(4S)" %}
1751  ins_encode %{
1752    __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
1753  %}
1754  ins_pipe(pipe_class_default);
1755%}
1756
1757instruct extract8S(iRegINoSp dst, vecX src, immI idx)
1758%{
1759  predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
1760  match(Set dst (ExtractS src idx));
1761  ins_cost(INSN_COST);
1762  format %{ "smov    $dst, $src, H, $idx\t# extract from vector(8S)" %}
1763  ins_encode %{
1764    __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
1765  %}
1766  ins_pipe(pipe_class_default);
1767%}
1768
1769instruct extract2I(iRegINoSp dst, vecD src, immI idx)
1770%{
1771  predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1772  match(Set dst (ExtractI src idx));
1773  ins_cost(INSN_COST);
1774  format %{ "umov    $dst, $src, S, $idx\t# extract from vector(2I)" %}
1775  ins_encode %{
1776    __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
1777  %}
1778  ins_pipe(pipe_class_default);
1779%}
1780
1781instruct extract4I(iRegINoSp dst, vecX src, immI idx)
1782%{
1783  predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
1784  match(Set dst (ExtractI src idx));
1785  ins_cost(INSN_COST);
1786  format %{ "umov    $dst, $src, S, $idx\t# extract from vector(4I)" %}
1787  ins_encode %{
1788    __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
1789  %}
1790  ins_pipe(pipe_class_default);
1791%}
1792
1793instruct extract2L(iRegLNoSp dst, vecX src, immI idx)
1794%{
1795  predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1796  match(Set dst (ExtractL src idx));
1797  ins_cost(INSN_COST);
1798  format %{ "umov    $dst, $src, D, $idx\t# extract from vector(2L)" %}
1799  ins_encode %{
1800    __ umov($dst$$Register, as_FloatRegister($src$$reg), __ D, $idx$$constant);
1801  %}
1802  ins_pipe(pipe_class_default);
1803%}
1804
1805instruct extract2F(vRegF dst, vecD src, immI idx)
1806%{
1807  predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1808  match(Set dst (ExtractF src idx));
1809  ins_cost(INSN_COST);
1810  format %{ "ins   $dst, S, $src, 0, $idx\t# extract from vector(2F)" %}
1811  ins_encode %{
1812    __ ins(as_FloatRegister($dst$$reg), __ S,
1813           as_FloatRegister($src$$reg), 0, $idx$$constant);
1814  %}
1815  ins_pipe(pipe_class_default);
1816%}
1817
1818instruct extract4F(vRegF dst, vecX src, immI idx)
1819%{
1820  predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
1821  match(Set dst (ExtractF src idx));
1822  ins_cost(INSN_COST);
1823  format %{ "ins   $dst, S, $src, 0, $idx\t# extract from vector(4F)" %}
1824  ins_encode %{
1825    __ ins(as_FloatRegister($dst$$reg), __ S,
1826           as_FloatRegister($src$$reg), 0, $idx$$constant);
1827  %}
1828  ins_pipe(pipe_class_default);
1829%}
1830
1831instruct extract2D(vRegD dst, vecX src, immI idx)
1832%{
1833  predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1834  match(Set dst (ExtractD src idx));
1835  ins_cost(INSN_COST);
1836  format %{ "ins   $dst, D, $src, 0, $idx\t# extract from vector(2D)" %}
1837  ins_encode %{
1838    __ ins(as_FloatRegister($dst$$reg), __ D,
1839           as_FloatRegister($src$$reg), 0, $idx$$constant);
1840  %}
1841  ins_pipe(pipe_class_default);
1842%}
1843
1844// ------------------------------ Vector comparison ---------------------------------
1845
1846instruct vcmpD(vecD dst, vecD src1, vecD src2, immI cond)
1847%{
1848  predicate(n->as_Vector()->length_in_bytes() == 8);
1849  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
1850  format %{ "vcmpD  $dst, $src1, $src2\t# vector compare " %}
1851  ins_cost(INSN_COST);
1852  ins_encode %{
1853    BasicType bt = vector_element_basic_type(this);
1854    assert(type2aelembytes(bt) != 8, "not supported");
1855    __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg),
1856                    as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ false);
1857  %}
1858  ins_pipe(vdop64);
1859%}
1860
1861instruct vcmpX(vecX dst, vecX src1, vecX src2, immI cond)
1862%{
1863  predicate(n->as_Vector()->length_in_bytes() == 16);
1864  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
1865  format %{ "vcmpX  $dst, $src1, $src2\t# vector compare " %}
1866  ins_cost(INSN_COST);
1867  ins_encode %{
1868    BasicType bt = vector_element_basic_type(this);
1869    __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg),
1870                    as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ true);
1871  %}
1872  ins_pipe(vdop128);
1873%}
1874
1875// ------------------------------ Vector mul -----------------------------------
1876
1877instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
1878%{
1879  predicate(n->as_Vector()->length() == 2);
1880  match(Set dst (MulVL src1 src2));
1881  ins_cost(INSN_COST);
1882  effect(TEMP tmp1, TEMP tmp2);
1883  format %{ "umov   $tmp1, $src1, D, 0\n\t"
1884            "umov   $tmp2, $src2, D, 0\n\t"
1885            "mul    $tmp2, $tmp2, $tmp1\n\t"
1886            "mov    $dst,  T2D,   0, $tmp2\t# insert into vector(2L)\n\t"
1887            "umov   $tmp1, $src1, D, 1\n\t"
1888            "umov   $tmp2, $src2, D, 1\n\t"
1889            "mul    $tmp2, $tmp2, $tmp1\n\t"
1890            "mov    $dst,  T2D,   1, $tmp2\t# insert into vector(2L)\n\t"
1891  %}
1892  ins_encode %{
1893    __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
1894    __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
1895    __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
1896    __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register);
1897    __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
1898    __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
1899    __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
1900    __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register);
1901  %}
1902  ins_pipe(pipe_slow);
1903%}
1904
1905// --------------------------------- Vector not --------------------------------
1906
1907instruct vnot2I(vecD dst, vecD src, immI_M1 m1)
1908%{
1909  predicate(n->as_Vector()->length_in_bytes() == 8);
1910  match(Set dst (XorV src (ReplicateB m1)));
1911  match(Set dst (XorV src (ReplicateS m1)));
1912  match(Set dst (XorV src (ReplicateI m1)));
1913  ins_cost(INSN_COST);
1914  format %{ "not  $dst, T8B, $src\t# vector (8B)" %}
1915  ins_encode %{
1916    __ notr(as_FloatRegister($dst$$reg), __ T8B,
1917            as_FloatRegister($src$$reg));
1918  %}
1919  ins_pipe(pipe_class_default);
1920%}
1921
1922instruct vnot4I(vecX dst, vecX src, immI_M1 m1)
1923%{
1924  predicate(n->as_Vector()->length_in_bytes() == 16);
1925  match(Set dst (XorV src (ReplicateB m1)));
1926  match(Set dst (XorV src (ReplicateS m1)));
1927  match(Set dst (XorV src (ReplicateI m1)));
1928  ins_cost(INSN_COST);
1929  format %{ "not  $dst, T16B, $src\t# vector (16B)" %}
1930  ins_encode %{
1931    __ notr(as_FloatRegister($dst$$reg), __ T16B,
1932            as_FloatRegister($src$$reg));
1933  %}
1934  ins_pipe(pipe_class_default);
1935%}
1936
1937instruct vnot2L(vecX dst, vecX src, immL_M1 m1)
1938%{
1939  predicate(n->as_Vector()->length_in_bytes() == 16);
1940  match(Set dst (XorV src (ReplicateL m1)));
1941  ins_cost(INSN_COST);
1942  format %{ "not  $dst, T16B, $src\t# vector (16B)" %}
1943  ins_encode %{
1944    __ notr(as_FloatRegister($dst$$reg), __ T16B,
1945            as_FloatRegister($src$$reg));
1946  %}
1947  ins_pipe(pipe_class_default);
1948%}
1949
1950// ------------------------------ Vector and_not -------------------------------
1951
1952instruct vand_not2I(vecD dst, vecD src1, vecD src2, immI_M1 m1)
1953%{
1954  predicate(n->as_Vector()->length_in_bytes() == 8);
1955  match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
1956  match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
1957  match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
1958  ins_cost(INSN_COST);
1959  format %{ "bic  $dst, T8B, $src1, $src2\t# vector (8B)" %}
1960  ins_encode %{
1961    __ bic(as_FloatRegister($dst$$reg), __ T8B,
1962           as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1963  %}
1964  ins_pipe(pipe_class_default);
1965%}
1966
1967instruct vand_not4I(vecX dst, vecX src1, vecX src2, immI_M1 m1)
1968%{
1969  predicate(n->as_Vector()->length_in_bytes() == 16);
1970  match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
1971  match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
1972  match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
1973  ins_cost(INSN_COST);
1974  format %{ "bic  $dst, T16B, $src1, $src2\t# vector (16B)" %}
1975  ins_encode %{
1976    __ bic(as_FloatRegister($dst$$reg), __ T16B,
1977           as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1978  %}
1979  ins_pipe(pipe_class_default);
1980%}
1981
1982instruct vand_not2L(vecX dst, vecX src1, vecX src2, immL_M1 m1)
1983%{
1984  predicate(n->as_Vector()->length_in_bytes() == 16);
1985  match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));
1986  ins_cost(INSN_COST);
1987  format %{ "bic  $dst, T16B, $src1, $src2\t# vector (16B)" %}
1988  ins_encode %{
1989    __ bic(as_FloatRegister($dst$$reg), __ T16B,
1990           as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1991  %}
1992  ins_pipe(pipe_class_default);
1993%}
1994
1995// ------------------------------ Vector max/min -------------------------------
1996
1997instruct vmax8B(vecD dst, vecD src1, vecD src2)
1998%{
1999  predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
2000             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2001  match(Set dst (MaxV src1 src2));
2002  ins_cost(INSN_COST);
2003  format %{ "maxv  $dst, T8B, $src1, $src2\t# vector (8B)" %}
2004  ins_encode %{
2005    __ maxv(as_FloatRegister($dst$$reg), __ T8B,
2006            as_FloatRegister($src1$$reg),
2007            as_FloatRegister($src2$$reg));
2008  %}
2009  ins_pipe(vdop64);
2010%}
2011
2012instruct vmax16B(vecX dst, vecX src1, vecX src2)
2013%{
2014  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2015  match(Set dst (MaxV src1 src2));
2016  ins_cost(INSN_COST);
2017  format %{ "maxv  $dst, T16B, $src1, $src2\t# vector (16B)" %}
2018  ins_encode %{
2019    __ maxv(as_FloatRegister($dst$$reg), __ T16B,
2020            as_FloatRegister($src1$$reg),
2021            as_FloatRegister($src2$$reg));
2022  %}
2023  ins_pipe(vdop128);
2024%}
2025
2026instruct vmax4S(vecD dst, vecD src1, vecD src2)
2027%{
2028  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2029  match(Set dst (MaxV src1 src2));
2030  ins_cost(INSN_COST);
2031  format %{ "maxv  $dst, T4H, $src1, $src2\t# vector (4S)" %}
2032  ins_encode %{
2033    __ maxv(as_FloatRegister($dst$$reg), __ T4H,
2034            as_FloatRegister($src1$$reg),
2035            as_FloatRegister($src2$$reg));
2036  %}
2037  ins_pipe(vdop64);
2038%}
2039
2040instruct vmax8S(vecX dst, vecX src1, vecX src2)
2041%{
2042  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2043  match(Set dst (MaxV src1 src2));
2044  ins_cost(INSN_COST);
2045  format %{ "maxv  $dst, T8H, $src1, $src2\t# vector (8S)" %}
2046  ins_encode %{
2047    __ maxv(as_FloatRegister($dst$$reg), __ T8H,
2048            as_FloatRegister($src1$$reg),
2049            as_FloatRegister($src2$$reg));
2050  %}
2051  ins_pipe(vdop128);
2052%}
2053
2054instruct vmax2I(vecD dst, vecD src1, vecD src2)
2055%{
2056  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2057  match(Set dst (MaxV src1 src2));
2058  ins_cost(INSN_COST);
2059  format %{ "maxv  $dst, T2S, $src1, $src2\t# vector (2I)" %}
2060  ins_encode %{
2061    __ maxv(as_FloatRegister($dst$$reg), __ T2S,
2062            as_FloatRegister($src1$$reg),
2063            as_FloatRegister($src2$$reg));
2064  %}
2065  ins_pipe(vdop64);
2066%}
2067
2068instruct vmax4I(vecX dst, vecX src1, vecX src2)
2069%{
2070  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2071  match(Set dst (MaxV src1 src2));
2072  ins_cost(INSN_COST);
2073  format %{ "maxv  $dst, T4S, $src1, $src2\t# vector (4I)" %}
2074  ins_encode %{
2075    __ maxv(as_FloatRegister($dst$$reg), __ T4S,
2076            as_FloatRegister($src1$$reg),
2077            as_FloatRegister($src2$$reg));
2078  %}
2079  ins_pipe(vdop128);
2080%}
2081
2082instruct vmin8B(vecD dst, vecD src1, vecD src2)
2083%{
2084  predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
2085             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2086  match(Set dst (MinV src1 src2));
2087  ins_cost(INSN_COST);
2088  format %{ "minv  $dst, T8B, $src1, $src2\t# vector (8B)" %}
2089  ins_encode %{
2090    __ minv(as_FloatRegister($dst$$reg), __ T8B,
2091            as_FloatRegister($src1$$reg),
2092            as_FloatRegister($src2$$reg));
2093  %}
2094  ins_pipe(vdop64);
2095%}
2096
2097instruct vmin16B(vecX dst, vecX src1, vecX src2)
2098%{
2099  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2100  match(Set dst (MinV src1 src2));
2101  ins_cost(INSN_COST);
2102  format %{ "minv  $dst, T16B, $src1, $src2\t# vector (16B)" %}
2103  ins_encode %{
2104    __ minv(as_FloatRegister($dst$$reg), __ T16B,
2105            as_FloatRegister($src1$$reg),
2106            as_FloatRegister($src2$$reg));
2107  %}
2108  ins_pipe(vdop128);
2109%}
2110
2111instruct vmin4S(vecD dst, vecD src1, vecD src2)
2112%{
2113  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2114  match(Set dst (MinV src1 src2));
2115  ins_cost(INSN_COST);
2116  format %{ "minv  $dst, T4H, $src1, $src2\t# vector (4S)" %}
2117  ins_encode %{
2118    __ minv(as_FloatRegister($dst$$reg), __ T4H,
2119            as_FloatRegister($src1$$reg),
2120            as_FloatRegister($src2$$reg));
2121  %}
2122  ins_pipe(vdop64);
2123%}
2124
2125instruct vmin8S(vecX dst, vecX src1, vecX src2)
2126%{
2127  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2128  match(Set dst (MinV src1 src2));
2129  ins_cost(INSN_COST);
2130  format %{ "minv  $dst, T8H, $src1, $src2\t# vector (8S)" %}
2131  ins_encode %{
2132    __ minv(as_FloatRegister($dst$$reg), __ T8H,
2133            as_FloatRegister($src1$$reg),
2134            as_FloatRegister($src2$$reg));
2135  %}
2136  ins_pipe(vdop128);
2137%}
2138
2139instruct vmin2I(vecD dst, vecD src1, vecD src2)
2140%{
2141  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2142  match(Set dst (MinV src1 src2));
2143  ins_cost(INSN_COST);
2144  format %{ "minv  $dst, T2S, $src1, $src2\t# vector (2I)" %}
2145  ins_encode %{
2146    __ minv(as_FloatRegister($dst$$reg), __ T2S,
2147            as_FloatRegister($src1$$reg),
2148            as_FloatRegister($src2$$reg));
2149  %}
2150  ins_pipe(vdop64);
2151%}
2152
2153instruct vmin4I(vecX dst, vecX src1, vecX src2)
2154%{
2155  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2156  match(Set dst (MinV src1 src2));
2157  ins_cost(INSN_COST);
2158  format %{ "minv  $dst, T4S, $src1, $src2\t# vector (4I)" %}
2159  ins_encode %{
2160    __ minv(as_FloatRegister($dst$$reg), __ T4S,
2161            as_FloatRegister($src1$$reg),
2162            as_FloatRegister($src2$$reg));
2163  %}
2164  ins_pipe(vdop128);
2165%}
2166
2167
2168instruct vmax2L(vecX dst, vecX src1, vecX src2)
2169%{
2170  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2171  match(Set dst (MaxV src1 src2));
2172  ins_cost(INSN_COST);
2173  effect(TEMP dst);
2174  format %{ "cmgt  $dst, T2D, $src1, $src2\t# vector (2L)\n\t"
2175            "bsl   $dst, T16B, $src1, $src2\t# vector (16B)" %}
2176  ins_encode %{
2177    __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
2178            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2179    __ bsl(as_FloatRegister($dst$$reg), __ T16B,
2180           as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2181  %}
2182  ins_pipe(vdop128);
2183%}
2184
2185instruct vmin2L(vecX dst, vecX src1, vecX src2)
2186%{
2187  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2188  match(Set dst (MinV src1 src2));
2189  ins_cost(INSN_COST);
2190  effect(TEMP dst);
2191  format %{ "cmgt  $dst, T2D, $src1, $src2\t# vector (2L)\n\t"
2192            "bsl   $dst, T16B, $src2, $src1\t# vector (16B)" %}
2193  ins_encode %{
2194    __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
2195            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2196    __ bsl(as_FloatRegister($dst$$reg), __ T16B,
2197           as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
2198  %}
2199  ins_pipe(vdop128);
2200%}
2201
2202// --------------------------------- blend (bsl) ----------------------------
2203
2204instruct vbsl8B(vecD dst, vecD src1, vecD src2)
2205%{
2206  predicate(n->as_Vector()->length_in_bytes() == 8);
2207  match(Set dst (VectorBlend (Binary src1 src2) dst));
2208  ins_cost(INSN_COST);
2209  format %{ "bsl  $dst, T8B, $src2, $src1\t# vector (8B)" %}
2210  ins_encode %{
2211    __ bsl(as_FloatRegister($dst$$reg), __ T8B,
2212           as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
2213  %}
2214  ins_pipe(vlogical64);
2215%}
2216
2217instruct vbsl16B(vecX dst, vecX src1, vecX src2)
2218%{
2219  predicate(n->as_Vector()->length_in_bytes() == 16);
2220  match(Set dst (VectorBlend (Binary src1 src2) dst));
2221  ins_cost(INSN_COST);
2222  format %{ "bsl  $dst, T16B, $src2, $src1\t# vector (16B)" %}
2223  ins_encode %{
2224    __ bsl(as_FloatRegister($dst$$reg), __ T16B,
2225           as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
2226  %}
2227  ins_pipe(vlogical128);
2228%}
2229
2230// --------------------------------- Load/store Mask ----------------------------
2231
2232instruct loadmask8B(vecD dst, vecD src  )
2233%{
2234  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2235  match(Set dst (VectorLoadMask src ));
2236  ins_cost(INSN_COST);
2237  format %{ "negr  $dst, T8B, $src\t# load mask (8B to 8B)" %}
2238  ins_encode %{
2239    __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
2240  %}
2241  ins_pipe(pipe_class_default);
2242%}
2243
2244instruct loadmask16B(vecX dst, vecX src  )
2245%{
2246  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2247  match(Set dst (VectorLoadMask src ));
2248  ins_cost(INSN_COST);
2249  format %{ "negr  $dst, T16B, $src\t# load mask (16B to 16B)" %}
2250  ins_encode %{
2251    __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
2252  %}
2253  ins_pipe(pipe_class_default);
2254%}
2255
2256instruct storemask8B(vecD dst, vecD src , immI_1 size)
2257%{
2258  predicate(n->as_Vector()->length() == 8);
2259  match(Set dst (VectorStoreMask src size));
2260  ins_cost(INSN_COST);
2261  format %{ "negr  $dst, T8B, $src\t# store mask (8B to 8B)" %}
2262  ins_encode %{
2263    __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
2264  %}
2265  ins_pipe(pipe_class_default);
2266%}
2267
2268instruct storemask16B(vecX dst, vecX src , immI_1 size)
2269%{
2270  predicate(n->as_Vector()->length() == 16);
2271  match(Set dst (VectorStoreMask src size));
2272  ins_cost(INSN_COST);
2273  format %{ "negr  $dst, T16B, $src\t# store mask (16B to 16B)" %}
2274  ins_encode %{
2275    __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
2276  %}
2277  ins_pipe(pipe_class_default);
2278%}
2279
2280instruct loadmask4S(vecD dst, vecD src  )
2281%{
2282  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2283  match(Set dst (VectorLoadMask src ));
2284  ins_cost(INSN_COST);
2285  format %{ "uxtl  $dst, T8H, $src, T8B\n\t"
2286            "negr  $dst, T8H, $dst\t# load mask (4B to 4H)" %}
2287  ins_encode %{
2288    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2289    __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
2290  %}
2291  ins_pipe(pipe_slow);
2292%}
2293
2294instruct loadmask8S(vecX dst, vecD src  )
2295%{
2296  predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2297  match(Set dst (VectorLoadMask src ));
2298  ins_cost(INSN_COST);
2299  format %{ "uxtl  $dst, T8H, $src, T8B\n\t"
2300            "negr  $dst, T8H, $dst\t# load mask (8B to 8H)" %}
2301  ins_encode %{
2302    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2303    __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
2304  %}
2305  ins_pipe(pipe_slow);
2306%}
2307
2308instruct storemask4S(vecD dst, vecD src , immI_2 size)
2309%{
2310  predicate(n->as_Vector()->length() == 4);
2311  match(Set dst (VectorStoreMask src size));
2312  ins_cost(INSN_COST);
2313  format %{ "xtn  $dst, T8B, $src, T8H\n\t"
2314            "negr  $dst, T8B, $dst\t# store mask (4H to 4B)" %}
2315  ins_encode %{
2316    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
2317    __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2318  %}
2319  ins_pipe(pipe_slow);
2320%}
2321
2322instruct storemask8S(vecD dst, vecX src , immI_2 size)
2323%{
2324  predicate(n->as_Vector()->length() == 8);
2325  match(Set dst (VectorStoreMask src size));
2326  ins_cost(INSN_COST);
2327  format %{ "xtn  $dst, T8B, $src, T8H\n\t"
2328            "negr  $dst, T8B, $dst\t# store mask (8H to 8B)" %}
2329  ins_encode %{
2330    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
2331    __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2332  %}
2333  ins_pipe(pipe_slow);
2334%}
2335
2336instruct loadmask2I(vecD dst, vecD src  )
2337%{
2338  predicate(n->as_Vector()->length() == 2 &&
2339            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2340             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2341  match(Set dst (VectorLoadMask src ));
2342  ins_cost(INSN_COST);
2343  format %{ "uxtl  $dst, T8H, $src, T8B\t# 2B to 2H\n\t"
2344            "uxtl  $dst, T4S, $dst, T4H\t# 2H to 2S\n\t"
2345            "negr   $dst, T4S, $dst\t# load mask (2B to 2S)" %}
2346  ins_encode %{
2347    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2348    __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2349    __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
2350  %}
2351  ins_pipe(pipe_slow);
2352%}
2353
2354instruct loadmask4I(vecX dst, vecD src  )
2355%{
2356  predicate(n->as_Vector()->length() == 4 &&
2357            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2358             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2359  match(Set dst (VectorLoadMask src ));
2360  ins_cost(INSN_COST);
2361  format %{ "uxtl  $dst, T8H, $src, T8B\t# 4B to 4H\n\t"
2362            "uxtl  $dst, T4S, $dst, T4H\t# 4H to 4S\n\t"
2363            "negr   $dst, T4S, $dst\t# load mask (4B to 4S)" %}
2364  ins_encode %{
2365    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2366    __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2367    __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
2368  %}
2369  ins_pipe(pipe_slow);
2370%}
2371
2372instruct storemask2I(vecD dst, vecD src , immI_4 size)
2373%{
2374  predicate(n->as_Vector()->length() == 2);
2375  match(Set dst (VectorStoreMask src size));
2376  ins_cost(INSN_COST);
2377  format %{ "xtn  $dst, T4H, $src, T4S\t# 2S to 2H\n\t"
2378            "xtn  $dst, T8B, $dst, T8H\t# 2H to 2B\n\t"
2379            "negr   $dst, T8B, $dst\t# store mask (2S to 2B)" %}
2380  ins_encode %{
2381    __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
2382    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
2383    __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2384  %}
2385  ins_pipe(pipe_slow);
2386%}
2387
2388instruct storemask4I(vecD dst, vecX src , immI_4 size)
2389%{
2390  predicate(n->as_Vector()->length() == 4);
2391  match(Set dst (VectorStoreMask src size));
2392  ins_cost(INSN_COST);
2393  format %{ "xtn  $dst, T4H, $src, T4S\t# 4S to 4H\n\t"
2394            "xtn  $dst, T8B, $dst, T8H\t# 4H to 4B\n\t"
2395            "negr   $dst, T8B, $dst\t# store mask (4S to 4B)" %}
2396  ins_encode %{
2397    __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
2398    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
2399    __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2400  %}
2401  ins_pipe(pipe_slow);
2402%}
2403
2404instruct loadmask2L(vecX dst, vecD src)
2405%{
2406  predicate(n->as_Vector()->length() == 2 &&
2407            (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2408             n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2409  match(Set dst (VectorLoadMask src));
2410  ins_cost(INSN_COST);
2411  format %{ "uxtl  $dst, T8H, $src, T8B\t# 2B to 2S\n\t"
2412            "uxtl  $dst, T4S, $dst, T4H\t# 2S to 2I\n\t"
2413            "uxtl  $dst, T2D, $dst, T2S\t# 2I to 2L\n\t"
2414            "neg   $dst, T2D, $dst\t# load mask (2B to 2L)" %}
2415  ins_encode %{
2416    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2417    __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2418    __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
2419    __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
2420  %}
2421  ins_pipe(pipe_slow);
2422%}
2423
2424instruct storemask2L(vecD dst, vecX src, immI_8 size)
2425%{
2426  predicate(n->as_Vector()->length() == 2);
2427  match(Set dst (VectorStoreMask src size));
2428  ins_cost(INSN_COST);
2429  format %{ "xtn  $dst, T2S, $src, T2D\t# 2L to 2I\n\t"
2430            "xtn  $dst, T4H, $dst, T4S\t# 2I to 2S\n\t"
2431            "xtn  $dst, T8B, $dst, T8H\t# 2S to 2B\n\t"
2432            "neg  $dst, T8B, $dst\t# store mask (2L to 2B)" %}
2433  ins_encode %{
2434    __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
2435    __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
2436    __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
2437    __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2438  %}
2439  ins_pipe(pipe_slow);
2440%}
2441
2442// vector mask cast
2443
2444instruct vmaskcastD(vecD dst)
2445%{
2446  predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
2447            n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8 &&
2448            n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
2449  match(Set dst (VectorMaskCast dst));
2450  ins_cost(0);
2451  format %{ "vmaskcast $dst\t# empty" %}
2452  ins_encode %{
2453    // empty
2454  %}
2455  ins_pipe(pipe_class_empty);
2456%}
2457
2458instruct vmaskcastX(vecX dst)
2459%{
2460  predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
2461            n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16 &&
2462            n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
2463  match(Set dst (VectorMaskCast dst));
2464  ins_cost(0);
2465  format %{ "vmaskcast $dst\t# empty" %}
2466  ins_encode %{
2467    // empty
2468  %}
2469  ins_pipe(pipe_class_empty);
2470%}
2471
2472//-------------------------------- LOAD_IOTA_INDICES----------------------------------
2473
2474instruct loadcon8B(vecD dst, immI0 src)
2475%{
2476  predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
2477             n->as_Vector()->length() == 8) &&
2478             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2479  match(Set dst (VectorLoadConst src));
2480  ins_cost(INSN_COST);
2481  format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
2482  ins_encode %{
2483    __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
2484    __ ldrd(as_FloatRegister($dst$$reg), rscratch1);
2485  %}
2486  ins_pipe(pipe_class_memory);
2487%}
2488
2489instruct loadcon16B(vecX dst, immI0 src)
2490%{
2491  predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2492  match(Set dst (VectorLoadConst src));
2493  ins_cost(INSN_COST);
2494  format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
2495  ins_encode %{
2496    __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
2497    __ ldrq(as_FloatRegister($dst$$reg), rscratch1);
2498  %}
2499  ins_pipe(pipe_class_memory);
2500%}
2501
2502//-------------------------------- LOAD_SHUFFLE ----------------------------------
2503
2504instruct loadshuffle8B(vecD dst, vecD src)
2505%{
2506  predicate(n->as_Vector()->length() == 8 &&
2507            n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2508  match(Set dst (VectorLoadShuffle src));
2509  ins_cost(INSN_COST);
2510  format %{ "mov  $dst, T8B, $src\t# get 8B shuffle" %}
2511  ins_encode %{
2512    __ orr(as_FloatRegister($dst$$reg), __ T8B,
2513           as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2514  %}
2515  ins_pipe(pipe_class_default);
2516%}
2517
2518instruct loadshuffle16B(vecX dst, vecX src)
2519%{
2520  predicate(n->as_Vector()->length() == 16 &&
2521            n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2522  match(Set dst (VectorLoadShuffle src));
2523  ins_cost(INSN_COST);
2524  format %{ "mov  $dst, T16B, $src\t# get 16B shuffle" %}
2525  ins_encode %{
2526    __ orr(as_FloatRegister($dst$$reg), __ T16B,
2527           as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2528  %}
2529  ins_pipe(pipe_class_default);
2530%}
2531
2532instruct loadshuffle4S(vecD dst, vecD src)
2533%{
2534  predicate(n->as_Vector()->length() == 4 &&
2535            n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2536  match(Set dst (VectorLoadShuffle src));
2537  ins_cost(INSN_COST);
2538  format %{ "uxtl  $dst, T8H, $src, T8B\t# 4B to 4H" %}
2539  ins_encode %{
2540    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2541  %}
2542  ins_pipe(pipe_class_default);
2543%}
2544
2545instruct loadshuffle8S(vecX dst, vecD src)
2546%{
2547  predicate(n->as_Vector()->length() == 8 &&
2548            n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2549  match(Set dst (VectorLoadShuffle src));
2550  ins_cost(INSN_COST);
2551  format %{ "uxtl  $dst, T8H, $src, T8B\t# 8B to 8H" %}
2552  ins_encode %{
2553    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2554  %}
2555  ins_pipe(pipe_class_default);
2556%}
2557
2558instruct loadshuffle4I(vecX dst, vecD src)
2559%{
2560  predicate(n->as_Vector()->length() == 4 &&
2561           (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2562            n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2563  match(Set dst (VectorLoadShuffle src));
2564  ins_cost(INSN_COST);
2565  format %{ "uxtl  $dst, T8H, $src, T8B\t# 4B to 4H \n\t"
2566            "uxtl  $dst, T4S, $dst, T4H\t# 4H to 4S" %}
2567  ins_encode %{
2568    __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2569    __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2570  %}
2571  ins_pipe(pipe_slow);
2572%}
2573
2574//-------------------------------- Rearrange -------------------------------------
2575// Here is an example that rearranges a NEON vector with 4 ints:
2576// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
2577//   1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
2578//   2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
2579//   3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
2580//   4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
2581//      and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
2582//   5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
2583//      and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
2584//   6. Use Vm as index register, and use V1 as table register.
2585//      Then get V2 as the result by tbl NEON instructions.
2586// Notes:
2587//   Step 1 matches VectorLoadConst.
2588//   Step 3 matches VectorLoadShuffle.
2589//   Step 4, 5, 6 match VectorRearrange.
2590//   For VectorRearrange short/int, the reason why such complex calculation is
2591//   required is because NEON tbl supports bytes table only, so for short/int, we
2592//   need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
2593//   to implement rearrange.
2594
2595instruct rearrange8B(vecD dst, vecD src, vecD shuffle)
2596%{
2597  predicate(n->as_Vector()->length() == 8 &&
2598            n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2599  match(Set dst (VectorRearrange src shuffle));
2600  ins_cost(INSN_COST);
2601  effect(TEMP_DEF dst);
2602  format %{ "tbl $dst, T8B, {$dst}, $shuffle\t# rearrange 8B" %}
2603  ins_encode %{
2604    __ tbl(as_FloatRegister($dst$$reg), __ T8B,
2605           as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
2606  %}
2607  ins_pipe(pipe_slow);
2608%}
2609
2610instruct rearrange16B(vecX dst, vecX src, vecX shuffle)
2611%{
2612  predicate(n->as_Vector()->length() == 16 &&
2613            n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2614  match(Set dst (VectorRearrange src shuffle));
2615  ins_cost(INSN_COST);
2616  effect(TEMP_DEF dst);
2617  format %{ "tbl $dst, T16B, {$dst}, $shuffle\t# rearrange 16B" %}
2618  ins_encode %{
2619    __ tbl(as_FloatRegister($dst$$reg), __ T16B,
2620           as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
2621  %}
2622  ins_pipe(pipe_slow);
2623%}
2624
2625instruct rearrange4S(vecD dst, vecD src, vecD shuffle, vecD tmp0, vecD tmp1)
2626%{
2627  predicate(n->as_Vector()->length() == 4 &&
2628            n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2629  match(Set dst (VectorRearrange src shuffle));
2630  ins_cost(INSN_COST);
2631  effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
2632  format %{ "mov   $tmp0, T8B, CONSTANT\t# constant 0x0202020202020202\n\t"
2633            "mov   $tmp1, T4H, CONSTANT\t# constant 0x0100010001000100\n\t"
2634            "mulv  $dst, T4H, T4H, $shuffle, $tmp0\n\t"
2635            "addv  $dst, T8B, T8B, $dst, $tmp1\n\t"
2636            "tbl   $dst, T8B, {$src}, 1, $dst\t# rearrange 4S" %}
2637  ins_encode %{
2638    __ mov(as_FloatRegister($tmp0$$reg), __ T8B, 0x02);
2639    __ mov(as_FloatRegister($tmp1$$reg), __ T4H, 0x0100);
2640    __ mulv(as_FloatRegister($dst$$reg), __ T4H,
2641            as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
2642    __ addv(as_FloatRegister($dst$$reg), __ T8B,
2643            as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
2644    __ tbl(as_FloatRegister($dst$$reg), __ T8B,
2645           as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
2646  %}
2647  ins_pipe(pipe_slow);
2648%}
2649
2650instruct rearrange8S(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
2651%{
2652  predicate(n->as_Vector()->length() == 8 &&
2653            n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2654  match(Set dst (VectorRearrange src shuffle));
2655  ins_cost(INSN_COST);
2656  effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
2657  format %{ "mov   $tmp0, T16B, CONSTANT\t# constant 0x0202020202020202\n\t"
2658            "mov   $tmp1, T8H, CONSTANT\t# constant 0x0100010001000100\n\t"
2659            "mulv  $dst, T8H, T8H, $shuffle, $tmp0\n\t"
2660            "addv  $dst, T16B, T16B, $dst, $tmp1\n\t"
2661            "tbl   $dst, T16B, {$src}, 1, $dst\t# rearrange 8S" %}
2662  ins_encode %{
2663    __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x02);
2664    __ mov(as_FloatRegister($tmp1$$reg), __ T8H, 0x0100);
2665    __ mulv(as_FloatRegister($dst$$reg), __ T8H,
2666            as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
2667    __ addv(as_FloatRegister($dst$$reg), __ T16B,
2668            as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
2669    __ tbl(as_FloatRegister($dst$$reg), __ T16B,
2670           as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
2671  %}
2672  ins_pipe(pipe_slow);
2673%}
2674
2675instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
2676%{
2677  predicate(n->as_Vector()->length() == 4 &&
2678           (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2679            n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2680  match(Set dst (VectorRearrange src shuffle));
2681  ins_cost(INSN_COST);
2682  effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
2683  format %{ "mov   $tmp0, T16B, CONSTANT\t# constant 0x0404040404040404\n\t"
2684            "mov   $tmp1, T4S, CONSTANT\t# constant 0x0302010003020100\n\t"
2685            "mulv  $dst, T4S, $shuffle, $tmp0\n\t"
2686            "addv  $dst, T16B, $dst, $tmp1\n\t"
2687            "tbl   $dst, T16B, {$src}, 1, $dst\t# rearrange 4I" %}
2688  ins_encode %{
2689    __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
2690    __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
2691    __ mulv(as_FloatRegister($dst$$reg), __ T4S,
2692            as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
2693    __ addv(as_FloatRegister($dst$$reg), __ T16B,
2694            as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
2695    __ tbl(as_FloatRegister($dst$$reg), __ T16B,
2696           as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
2697  %}
2698  ins_pipe(pipe_slow);
2699%}
2700
2701//-------------------------------- Anytrue/alltrue -----------------------------
2702
2703instruct anytrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
2704%{
2705  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
2706  match(Set dst (VectorTest src1 src2 ));
2707  ins_cost(INSN_COST);
2708  effect(TEMP tmp, KILL cr);
2709  format %{ "addv  $tmp, T8B, $src1\n\t"
2710            "umov  $dst, $tmp, B, 0\n\t"
2711            "cmp   $dst, 0\n\t"
2712            "cset  $dst\t# anytrue 8B" %}
2713  ins_encode %{
2714    // No need to use src2.
2715    __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
2716    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2717    __ cmpw($dst$$Register, zr);
2718    __ csetw($dst$$Register, Assembler::NE);
2719  %}
2720  ins_pipe(pipe_slow);
2721%}
2722
2723instruct anytrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
2724%{
2725  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
2726  match(Set dst (VectorTest src1 src2 ));
2727  ins_cost(INSN_COST);
2728  effect(TEMP tmp, KILL cr);
2729  format %{ "addv  $tmp, T16B, $src1\n\t"
2730            "umov  $dst, $tmp, B, 0\n\t"
2731            "cmp   $dst, 0\n\t"
2732            "cset  $dst\t# anytrue 16B" %}
2733  ins_encode %{
2734    // No need to use src2.
2735    __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
2736    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2737    __ cmpw($dst$$Register, zr);
2738    __ csetw($dst$$Register, Assembler::NE);
2739  %}
2740  ins_pipe(pipe_slow);
2741%}
2742
2743instruct alltrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
2744%{
2745  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
2746  match(Set dst (VectorTest src1 src2 ));
2747  ins_cost(INSN_COST);
2748  effect(TEMP tmp, KILL cr);
2749  format %{ "uminv $tmp, T8B, $src1\n\t"
2750            "umov  $dst, $tmp, B, 0\n\t"
2751            "cmp   $dst, 0xff\n\t"
2752            "cset  $dst\t# alltrue 8B" %}
2753  ins_encode %{
2754    // No need to use src2.
2755    __ uminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
2756    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2757    __ cmpw($dst$$Register, 0xff);
2758    __ csetw($dst$$Register, Assembler::EQ);
2759  %}
2760  ins_pipe(pipe_slow);
2761%}
2762
2763instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
2764%{
2765  predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
2766  match(Set dst (VectorTest src1 src2 ));
2767  ins_cost(INSN_COST);
2768  effect(TEMP tmp, KILL cr);
2769  format %{ "uminv $tmp, T16B, $src1\n\t"
2770            "umov  $dst, $tmp, B, 0\n\t"
2771            "cmp   $dst, 0xff\n\t"
2772            "cset  $dst\t# alltrue 16B" %}
2773  ins_encode %{
2774    // No need to use src2.
2775    __ uminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
2776    __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2777    __ cmpw($dst$$Register, 0xff);
2778    __ csetw($dst$$Register, Assembler::EQ);
2779  %}
2780  ins_pipe(pipe_slow);
2781%}
2782
2783// --------------------------------- ABS --------------------------------------
2784
2785instruct vabs8B(vecD dst, vecD src)
2786%{
2787  predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);
2788  match(Set dst (AbsVB src));
2789  ins_cost(INSN_COST);
2790  format %{ "abs  $dst, T8B, $src\t# vector (8B)" %}
2791  ins_encode %{
2792    __ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
2793  %}
2794  ins_pipe(vlogical64);
2795%}
2796
2797instruct vabs16B(vecX dst, vecX src)
2798%{
2799  predicate(n->as_Vector()->length() == 16);
2800  match(Set dst (AbsVB src));
2801  ins_cost(INSN_COST);
2802  format %{ "abs  $dst, T16B, $src\t# vector (16B)" %}
2803  ins_encode %{
2804    __ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
2805  %}
2806  ins_pipe(vlogical128);
2807%}
2808
2809instruct vabs4S(vecD dst, vecD src)
2810%{
2811  predicate(n->as_Vector()->length() == 4);
2812  match(Set dst (AbsVS src));
2813  ins_cost(INSN_COST);
2814  format %{ "abs  $dst, T4H, $src\t# vector (4H)" %}
2815  ins_encode %{
2816    __ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg));
2817  %}
2818  ins_pipe(vlogical64);
2819%}
2820
2821instruct vabs8S(vecX dst, vecX src)
2822%{
2823  predicate(n->as_Vector()->length() == 8);
2824  match(Set dst (AbsVS src));
2825  ins_cost(INSN_COST);
2826  format %{ "abs  $dst, T8H, $src\t# vector (8H)" %}
2827  ins_encode %{
2828    __ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg));
2829  %}
2830  ins_pipe(vlogical128);
2831%}
2832
2833instruct vabs2I(vecD dst, vecD src)
2834%{
2835  predicate(n->as_Vector()->length() == 2);
2836  match(Set dst (AbsVI src));
2837  ins_cost(INSN_COST);
2838  format %{ "abs  $dst, T2S, $src\t# vector (2S)" %}
2839  ins_encode %{
2840    __ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
2841  %}
2842  ins_pipe(vlogical64);
2843%}
2844
2845instruct vabs4I(vecX dst, vecX src)
2846%{
2847  predicate(n->as_Vector()->length() == 4);
2848  match(Set dst (AbsVI src));
2849  ins_cost(INSN_COST);
2850  format %{ "abs  $dst, T4S, $src\t# vector (4S)" %}
2851  ins_encode %{
2852    __ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
2853  %}
2854  ins_pipe(vlogical128);
2855%}
2856
2857instruct vabs2L(vecX dst, vecX src)
2858%{
2859  predicate(n->as_Vector()->length() == 2);
2860  match(Set dst (AbsVL src));
2861  ins_cost(INSN_COST);
2862  format %{ "abs  $dst, T2D, $src\t# vector (2D)" %}
2863  ins_encode %{
2864    __ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
2865  %}
2866  ins_pipe(vlogical128);
2867%}
2868
2869instruct vabs2F(vecD dst, vecD src)
2870%{
2871  predicate(n->as_Vector()->length() == 2);
2872  match(Set dst (AbsVF src));
2873  ins_cost(INSN_COST * 3);
2874  format %{ "fabs  $dst, T2S, $src\t# vector (2S)" %}
2875  ins_encode %{
2876    __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
2877  %}
2878  ins_pipe(vunop_fp64);
2879%}
2880
2881instruct vabs4F(vecX dst, vecX src)
2882%{
2883  predicate(n->as_Vector()->length() == 4);
2884  match(Set dst (AbsVF src));
2885  ins_cost(INSN_COST * 3);
2886  format %{ "fabs  $dst, T4S, $src\t# vector (4S)" %}
2887  ins_encode %{
2888    __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
2889  %}
2890  ins_pipe(vunop_fp128);
2891%}
2892
2893instruct vabs2D(vecX dst, vecX src)
2894%{
2895  predicate(n->as_Vector()->length() == 2);
2896  match(Set dst (AbsVD src));
2897  ins_cost(INSN_COST * 3);
2898  format %{ "fabs  $dst, T2D, $src\t# vector (2D)" %}
2899  ins_encode %{
2900    __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
2901  %}
2902  ins_pipe(vunop_fp128);
2903%}
2904
2905// --------------------------------- FABS DIFF --------------------------------
2906
2907instruct vabd2F(vecD dst, vecD src1, vecD src2)
2908%{
2909  predicate(n->as_Vector()->length() == 2);
2910  match(Set dst (AbsVF (SubVF src1 src2)));
2911  ins_cost(INSN_COST * 3);
2912  format %{ "fabd  $dst, T2S, $src1, $src2\t# vector (2S)" %}
2913  ins_encode %{
2914    __ fabd(as_FloatRegister($dst$$reg), __ T2S,
2915            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2916  %}
2917  ins_pipe(vunop_fp64);
2918%}
2919
2920instruct vabd4F(vecX dst, vecX src1, vecX src2)
2921%{
2922  predicate(n->as_Vector()->length() == 4);
2923  match(Set dst (AbsVF (SubVF src1 src2)));
2924  ins_cost(INSN_COST * 3);
2925  format %{ "fabd  $dst, T4S, $src1, $src2\t# vector (4S)" %}
2926  ins_encode %{
2927    __ fabd(as_FloatRegister($dst$$reg), __ T4S,
2928            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2929  %}
2930  ins_pipe(vunop_fp128);
2931%}
2932
2933instruct vabd2D(vecX dst, vecX src1, vecX src2)
2934%{
2935  predicate(n->as_Vector()->length() == 2);
2936  match(Set dst (AbsVD (SubVD src1 src2)));
2937  ins_cost(INSN_COST * 3);
2938  format %{ "fabd  $dst, T2D, $src1, $src2\t# vector (2D)" %}
2939  ins_encode %{
2940    __ fabd(as_FloatRegister($dst$$reg), __ T2D,
2941            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2942  %}
2943  ins_pipe(vunop_fp128);
2944%}
2945
2946instruct replicate8B(vecD dst, iRegIorL2I src)
2947%{
2948  predicate(n->as_Vector()->length() == 4 ||
2949            n->as_Vector()->length() == 8);
2950  match(Set dst (ReplicateB src));
2951  ins_cost(INSN_COST);
2952  format %{ "dup  $dst, $src\t# vector (8B)" %}
2953  ins_encode %{
2954    __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
2955  %}
2956  ins_pipe(vdup_reg_reg64);
2957%}
2958
2959instruct replicate16B(vecX dst, iRegIorL2I src)
2960%{
2961  predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
2962  match(Set dst (ReplicateB src));
2963  ins_cost(INSN_COST);
2964  format %{ "dup  $dst, $src\t# vector (16B)" %}
2965  ins_encode %{
2966    __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
2967  %}
2968  ins_pipe(vdup_reg_reg128);
2969%}
2970
2971instruct replicate8B_imm(vecD dst, immI con)
2972%{
2973  predicate(n->as_Vector()->length() == 4 ||
2974            n->as_Vector()->length() == 8);
2975  match(Set dst (ReplicateB con));
2976  ins_cost(INSN_COST);
2977  format %{ "movi  $dst, $con\t# vector (8B)" %}
2978  ins_encode %{
2979    __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
2980  %}
2981  ins_pipe(vmovi_reg_imm64);
2982%}
2983
2984instruct replicate16B_imm(vecX dst, immI con)
2985%{
2986  predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
2987  match(Set dst (ReplicateB con));
2988  ins_cost(INSN_COST);
2989  format %{ "movi  $dst, $con\t# vector (16B)" %}
2990  ins_encode %{
2991    __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
2992  %}
2993  ins_pipe(vmovi_reg_imm128);
2994%}
2995
2996instruct replicate4S(vecD dst, iRegIorL2I src)
2997%{
2998  predicate(n->as_Vector()->length() == 2 ||
2999            n->as_Vector()->length() == 4);
3000  match(Set dst (ReplicateS src));
3001  ins_cost(INSN_COST);
3002  format %{ "dup  $dst, $src\t# vector (4S)" %}
3003  ins_encode %{
3004    __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
3005  %}
3006  ins_pipe(vdup_reg_reg64);
3007%}
3008
3009instruct replicate8S(vecX dst, iRegIorL2I src)
3010%{
3011  predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
3012  match(Set dst (ReplicateS src));
3013  ins_cost(INSN_COST);
3014  format %{ "dup  $dst, $src\t# vector (8S)" %}
3015  ins_encode %{
3016    __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
3017  %}
3018  ins_pipe(vdup_reg_reg128);
3019%}
3020
3021instruct replicate4S_imm(vecD dst, immI con)
3022%{
3023  predicate(n->as_Vector()->length() == 2 ||
3024            n->as_Vector()->length() == 4);
3025  match(Set dst (ReplicateS con));
3026  ins_cost(INSN_COST);
3027  format %{ "movi  $dst, $con\t# vector (4H)" %}
3028  ins_encode %{
3029    __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
3030  %}
3031  ins_pipe(vmovi_reg_imm64);
3032%}
3033
3034instruct replicate8S_imm(vecX dst, immI con)
3035%{
3036  predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
3037  match(Set dst (ReplicateS con));
3038  ins_cost(INSN_COST);
3039  format %{ "movi  $dst, $con\t# vector (8H)" %}
3040  ins_encode %{
3041    __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
3042  %}
3043  ins_pipe(vmovi_reg_imm128);
3044%}
3045
3046instruct replicate2I(vecD dst, iRegIorL2I src)
3047%{
3048  predicate(n->as_Vector()->length() == 2);
3049  match(Set dst (ReplicateI src));
3050  ins_cost(INSN_COST);
3051  format %{ "dup  $dst, $src\t# vector (2I)" %}
3052  ins_encode %{
3053    __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
3054  %}
3055  ins_pipe(vdup_reg_reg64);
3056%}
3057
3058instruct replicate4I(vecX dst, iRegIorL2I src)
3059%{
3060  predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
3061  match(Set dst (ReplicateI src));
3062  ins_cost(INSN_COST);
3063  format %{ "dup  $dst, $src\t# vector (4I)" %}
3064  ins_encode %{
3065    __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
3066  %}
3067  ins_pipe(vdup_reg_reg128);
3068%}
3069
3070instruct replicate2I_imm(vecD dst, immI con)
3071%{
3072  predicate(n->as_Vector()->length() == 2);
3073  match(Set dst (ReplicateI con));
3074  ins_cost(INSN_COST);
3075  format %{ "movi  $dst, $con\t# vector (2I)" %}
3076  ins_encode %{
3077    __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
3078  %}
3079  ins_pipe(vmovi_reg_imm64);
3080%}
3081
3082instruct replicate4I_imm(vecX dst, immI con)
3083%{
3084  predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
3085  match(Set dst (ReplicateI con));
3086  ins_cost(INSN_COST);
3087  format %{ "movi  $dst, $con\t# vector (4I)" %}
3088  ins_encode %{
3089    __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
3090  %}
3091  ins_pipe(vmovi_reg_imm128);
3092%}
3093
3094instruct replicate2L(vecX dst, iRegL src)
3095%{
3096  predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3097  match(Set dst (ReplicateL src));
3098  ins_cost(INSN_COST);
3099  format %{ "dup  $dst, $src\t# vector (2L)" %}
3100  ins_encode %{
3101    __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
3102  %}
3103  ins_pipe(vdup_reg_reg128);
3104%}
3105
3106instruct replicate2L_zero(vecX dst, immI0 zero)
3107%{
3108  predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3109  match(Set dst (ReplicateI zero));
3110  ins_cost(INSN_COST);
3111  format %{ "movi  $dst, $zero\t# vector (4I)" %}
3112  ins_encode %{
3113    __ eor(as_FloatRegister($dst$$reg), __ T16B,
3114           as_FloatRegister($dst$$reg),
3115           as_FloatRegister($dst$$reg));
3116  %}
3117  ins_pipe(vmovi_reg_imm128);
3118%}
3119
3120instruct replicate2F(vecD dst, vRegF src)
3121%{
3122  predicate(n->as_Vector()->length() == 2);
3123  match(Set dst (ReplicateF src));
3124  ins_cost(INSN_COST);
3125  format %{ "dup  $dst, $src\t# vector (2F)" %}
3126  ins_encode %{
3127    __ dup(as_FloatRegister($dst$$reg), __ T2S,
3128           as_FloatRegister($src$$reg));
3129  %}
3130  ins_pipe(vdup_reg_freg64);
3131%}
3132
3133instruct replicate4F(vecX dst, vRegF src)
3134%{
3135  predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
3136  match(Set dst (ReplicateF src));
3137  ins_cost(INSN_COST);
3138  format %{ "dup  $dst, $src\t# vector (4F)" %}
3139  ins_encode %{
3140    __ dup(as_FloatRegister($dst$$reg), __ T4S,
3141           as_FloatRegister($src$$reg));
3142  %}
3143  ins_pipe(vdup_reg_freg128);
3144%}
3145
3146instruct replicate2D(vecX dst, vRegD src)
3147%{
3148  predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3149  match(Set dst (ReplicateD src));
3150  ins_cost(INSN_COST);
3151  format %{ "dup  $dst, $src\t# vector (2D)" %}
3152  ins_encode %{
3153    __ dup(as_FloatRegister($dst$$reg), __ T2D,
3154           as_FloatRegister($src$$reg));
3155  %}
3156  ins_pipe(vdup_reg_dreg128);
3157%}
3158
3159// ====================REDUCTION ARITHMETIC====================================
3160
3161instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
3162%{
3163  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3164  match(Set dst (AddReductionVI isrc vsrc));
3165  ins_cost(INSN_COST);
3166  effect(TEMP vtmp, TEMP itmp);
3167  format %{ "addpv  $vtmp, T2S, $vsrc, $vsrc\n\t"
3168            "umov  $itmp, $vtmp, S, 0\n\t"
3169            "addw  $dst, $itmp, $isrc\t# add reduction2I"
3170  %}
3171  ins_encode %{
3172    __ addpv(as_FloatRegister($vtmp$$reg), __ T2S,
3173             as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
3174    __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
3175    __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
3176  %}
3177  ins_pipe(pipe_class_default);
3178%}
3179
3180instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
3181%{
3182  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3183  match(Set dst (AddReductionVI isrc vsrc));
3184  ins_cost(INSN_COST);
3185  effect(TEMP vtmp, TEMP itmp);
3186  format %{ "addv  $vtmp, T4S, $vsrc\n\t"
3187            "umov  $itmp, $vtmp, S, 0\n\t"
3188            "addw  $dst, $itmp, $isrc\t# add reduction4I"
3189  %}
3190  ins_encode %{
3191    __ addv(as_FloatRegister($vtmp$$reg), __ T4S,
3192            as_FloatRegister($vsrc$$reg));
3193    __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
3194    __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
3195  %}
3196  ins_pipe(pipe_class_default);
3197%}
3198
3199instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
3200%{
3201  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3202  match(Set dst (MulReductionVI isrc vsrc));
3203  ins_cost(INSN_COST);
3204  effect(TEMP tmp, TEMP dst);
3205  format %{ "umov  $tmp, $vsrc, S, 0\n\t"
3206            "mul   $dst, $tmp, $isrc\n\t"
3207            "umov  $tmp, $vsrc, S, 1\n\t"
3208            "mul   $dst, $tmp, $dst\t# mul reduction2I"
3209  %}
3210  ins_encode %{
3211    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
3212    __ mul($dst$$Register, $tmp$$Register, $isrc$$Register);
3213    __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
3214    __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
3215  %}
3216  ins_pipe(pipe_class_default);
3217%}
3218
3219instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
3220%{
3221  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3222  match(Set dst (MulReductionVI isrc vsrc));
3223  ins_cost(INSN_COST);
3224  effect(TEMP vtmp, TEMP itmp, TEMP dst);
3225  format %{ "ins   $vtmp, D, $vsrc, 0, 1\n\t"
3226            "mulv  $vtmp, T2S, $vtmp, $vsrc\n\t"
3227            "umov  $itmp, $vtmp, S, 0\n\t"
3228            "mul   $dst, $itmp, $isrc\n\t"
3229            "umov  $itmp, $vtmp, S, 1\n\t"
3230            "mul   $dst, $itmp, $dst\t# mul reduction4I"
3231  %}
3232  ins_encode %{
3233    __ ins(as_FloatRegister($vtmp$$reg), __ D,
3234           as_FloatRegister($vsrc$$reg), 0, 1);
3235    __ mulv(as_FloatRegister($vtmp$$reg), __ T2S,
3236            as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
3237    __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
3238    __ mul($dst$$Register, $itmp$$Register, $isrc$$Register);
3239    __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1);
3240    __ mul($dst$$Register, $itmp$$Register, $dst$$Register);
3241  %}
3242  ins_pipe(pipe_class_default);
3243%}
3244
3245instruct reduce_add2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp)
3246%{
3247  match(Set dst (AddReductionVF fsrc vsrc));
3248  ins_cost(INSN_COST);
3249  effect(TEMP tmp, TEMP dst);
3250  format %{ "fadds $dst, $fsrc, $vsrc\n\t"
3251            "ins   $tmp, S, $vsrc, 0, 1\n\t"
3252            "fadds $dst, $dst, $tmp\t# add reduction2F"
3253  %}
3254  ins_encode %{
3255    __ fadds(as_FloatRegister($dst$$reg),
3256             as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3257    __ ins(as_FloatRegister($tmp$$reg), __ S,
3258           as_FloatRegister($vsrc$$reg), 0, 1);
3259    __ fadds(as_FloatRegister($dst$$reg),
3260             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3261  %}
3262  ins_pipe(pipe_class_default);
3263%}
3264
3265instruct reduce_add4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp)
3266%{
3267  match(Set dst (AddReductionVF fsrc vsrc));
3268  ins_cost(INSN_COST);
3269  effect(TEMP tmp, TEMP dst);
3270  format %{ "fadds $dst, $fsrc, $vsrc\n\t"
3271            "ins   $tmp, S, $vsrc, 0, 1\n\t"
3272            "fadds $dst, $dst, $tmp\n\t"
3273            "ins   $tmp, S, $vsrc, 0, 2\n\t"
3274            "fadds $dst, $dst, $tmp\n\t"
3275            "ins   $tmp, S, $vsrc, 0, 3\n\t"
3276            "fadds $dst, $dst, $tmp\t# add reduction4F"
3277  %}
3278  ins_encode %{
3279    __ fadds(as_FloatRegister($dst$$reg),
3280             as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3281    __ ins(as_FloatRegister($tmp$$reg), __ S,
3282           as_FloatRegister($vsrc$$reg), 0, 1);
3283    __ fadds(as_FloatRegister($dst$$reg),
3284             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3285    __ ins(as_FloatRegister($tmp$$reg), __ S,
3286           as_FloatRegister($vsrc$$reg), 0, 2);
3287    __ fadds(as_FloatRegister($dst$$reg),
3288             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3289    __ ins(as_FloatRegister($tmp$$reg), __ S,
3290           as_FloatRegister($vsrc$$reg), 0, 3);
3291    __ fadds(as_FloatRegister($dst$$reg),
3292             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3293  %}
3294  ins_pipe(pipe_class_default);
3295%}
3296
3297instruct reduce_mul2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp)
3298%{
3299  match(Set dst (MulReductionVF fsrc vsrc));
3300  ins_cost(INSN_COST);
3301  effect(TEMP tmp, TEMP dst);
3302  format %{ "fmuls $dst, $fsrc, $vsrc\n\t"
3303            "ins   $tmp, S, $vsrc, 0, 1\n\t"
3304            "fmuls $dst, $dst, $tmp\t# mul reduction2F"
3305  %}
3306  ins_encode %{
3307    __ fmuls(as_FloatRegister($dst$$reg),
3308             as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3309    __ ins(as_FloatRegister($tmp$$reg), __ S,
3310           as_FloatRegister($vsrc$$reg), 0, 1);
3311    __ fmuls(as_FloatRegister($dst$$reg),
3312             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3313  %}
3314  ins_pipe(pipe_class_default);
3315%}
3316
3317instruct reduce_mul4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp)
3318%{
3319  match(Set dst (MulReductionVF fsrc vsrc));
3320  ins_cost(INSN_COST);
3321  effect(TEMP tmp, TEMP dst);
3322  format %{ "fmuls $dst, $fsrc, $vsrc\n\t"
3323            "ins   $tmp, S, $vsrc, 0, 1\n\t"
3324            "fmuls $dst, $dst, $tmp\n\t"
3325            "ins   $tmp, S, $vsrc, 0, 2\n\t"
3326            "fmuls $dst, $dst, $tmp\n\t"
3327            "ins   $tmp, S, $vsrc, 0, 3\n\t"
3328            "fmuls $dst, $dst, $tmp\t# mul reduction4F"
3329  %}
3330  ins_encode %{
3331    __ fmuls(as_FloatRegister($dst$$reg),
3332             as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3333    __ ins(as_FloatRegister($tmp$$reg), __ S,
3334           as_FloatRegister($vsrc$$reg), 0, 1);
3335    __ fmuls(as_FloatRegister($dst$$reg),
3336             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3337    __ ins(as_FloatRegister($tmp$$reg), __ S,
3338           as_FloatRegister($vsrc$$reg), 0, 2);
3339    __ fmuls(as_FloatRegister($dst$$reg),
3340             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3341    __ ins(as_FloatRegister($tmp$$reg), __ S,
3342           as_FloatRegister($vsrc$$reg), 0, 3);
3343    __ fmuls(as_FloatRegister($dst$$reg),
3344             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3345  %}
3346  ins_pipe(pipe_class_default);
3347%}
3348
3349instruct reduce_add2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp)
3350%{
3351  match(Set dst (AddReductionVD dsrc vsrc));
3352  ins_cost(INSN_COST);
3353  effect(TEMP tmp, TEMP dst);
3354  format %{ "faddd $dst, $dsrc, $vsrc\n\t"
3355            "ins   $tmp, D, $vsrc, 0, 1\n\t"
3356            "faddd $dst, $dst, $tmp\t# add reduction2D"
3357  %}
3358  ins_encode %{
3359    __ faddd(as_FloatRegister($dst$$reg),
3360             as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
3361    __ ins(as_FloatRegister($tmp$$reg), __ D,
3362           as_FloatRegister($vsrc$$reg), 0, 1);
3363    __ faddd(as_FloatRegister($dst$$reg),
3364             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3365  %}
3366  ins_pipe(pipe_class_default);
3367%}
3368
3369instruct reduce_mul2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp)
3370%{
3371  match(Set dst (MulReductionVD dsrc vsrc));
3372  ins_cost(INSN_COST);
3373  effect(TEMP tmp, TEMP dst);
3374  format %{ "fmuld $dst, $dsrc, $vsrc\n\t"
3375            "ins   $tmp, D, $vsrc, 0, 1\n\t"
3376            "fmuld $dst, $dst, $tmp\t# mul reduction2D"
3377  %}
3378  ins_encode %{
3379    __ fmuld(as_FloatRegister($dst$$reg),
3380             as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
3381    __ ins(as_FloatRegister($tmp$$reg), __ D,
3382           as_FloatRegister($vsrc$$reg), 0, 1);
3383    __ fmuld(as_FloatRegister($dst$$reg),
3384             as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3385  %}
3386  ins_pipe(pipe_class_default);
3387%}
3388
3389// ====================VECTOR ARITHMETIC=======================================
3390
3391// --------------------------------- ADD --------------------------------------
3392
3393instruct vadd8B(vecD dst, vecD src1, vecD src2)
3394%{
3395  predicate(n->as_Vector()->length() == 4 ||
3396            n->as_Vector()->length() == 8);
3397  match(Set dst (AddVB src1 src2));
3398  ins_cost(INSN_COST);
3399  format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
3400  ins_encode %{
3401    __ addv(as_FloatRegister($dst$$reg), __ T8B,
3402            as_FloatRegister($src1$$reg),
3403            as_FloatRegister($src2$$reg));
3404  %}
3405  ins_pipe(vdop64);
3406%}
3407
3408instruct vadd16B(vecX dst, vecX src1, vecX src2)
3409%{
3410  predicate(n->as_Vector()->length() == 16);
3411  match(Set dst (AddVB src1 src2));
3412  ins_cost(INSN_COST);
3413  format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
3414  ins_encode %{
3415    __ addv(as_FloatRegister($dst$$reg), __ T16B,
3416            as_FloatRegister($src1$$reg),
3417            as_FloatRegister($src2$$reg));
3418  %}
3419  ins_pipe(vdop128);
3420%}
3421
3422instruct vadd4S(vecD dst, vecD src1, vecD src2)
3423%{
3424  predicate(n->as_Vector()->length() == 2 ||
3425            n->as_Vector()->length() == 4);
3426  match(Set dst (AddVS src1 src2));
3427  ins_cost(INSN_COST);
3428  format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
3429  ins_encode %{
3430    __ addv(as_FloatRegister($dst$$reg), __ T4H,
3431            as_FloatRegister($src1$$reg),
3432            as_FloatRegister($src2$$reg));
3433  %}
3434  ins_pipe(vdop64);
3435%}
3436
3437instruct vadd8S(vecX dst, vecX src1, vecX src2)
3438%{
3439  predicate(n->as_Vector()->length() == 8);
3440  match(Set dst (AddVS src1 src2));
3441  ins_cost(INSN_COST);
3442  format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
3443  ins_encode %{
3444    __ addv(as_FloatRegister($dst$$reg), __ T8H,
3445            as_FloatRegister($src1$$reg),
3446            as_FloatRegister($src2$$reg));
3447  %}
3448  ins_pipe(vdop128);
3449%}
3450
3451instruct vadd2I(vecD dst, vecD src1, vecD src2)
3452%{
3453  predicate(n->as_Vector()->length() == 2);
3454  match(Set dst (AddVI src1 src2));
3455  ins_cost(INSN_COST);
3456  format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
3457  ins_encode %{
3458    __ addv(as_FloatRegister($dst$$reg), __ T2S,
3459            as_FloatRegister($src1$$reg),
3460            as_FloatRegister($src2$$reg));
3461  %}
3462  ins_pipe(vdop64);
3463%}
3464
3465instruct vadd4I(vecX dst, vecX src1, vecX src2)
3466%{
3467  predicate(n->as_Vector()->length() == 4);
3468  match(Set dst (AddVI src1 src2));
3469  ins_cost(INSN_COST);
3470  format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
3471  ins_encode %{
3472    __ addv(as_FloatRegister($dst$$reg), __ T4S,
3473            as_FloatRegister($src1$$reg),
3474            as_FloatRegister($src2$$reg));
3475  %}
3476  ins_pipe(vdop128);
3477%}
3478
3479instruct vadd2L(vecX dst, vecX src1, vecX src2)
3480%{
3481  predicate(n->as_Vector()->length() == 2);
3482  match(Set dst (AddVL src1 src2));
3483  ins_cost(INSN_COST);
3484  format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
3485  ins_encode %{
3486    __ addv(as_FloatRegister($dst$$reg), __ T2D,
3487            as_FloatRegister($src1$$reg),
3488            as_FloatRegister($src2$$reg));
3489  %}
3490  ins_pipe(vdop128);
3491%}
3492
3493instruct vadd2F(vecD dst, vecD src1, vecD src2)
3494%{
3495  predicate(n->as_Vector()->length() == 2);
3496  match(Set dst (AddVF src1 src2));
3497  ins_cost(INSN_COST);
3498  format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
3499  ins_encode %{
3500    __ fadd(as_FloatRegister($dst$$reg), __ T2S,
3501            as_FloatRegister($src1$$reg),
3502            as_FloatRegister($src2$$reg));
3503  %}
3504  ins_pipe(vdop_fp64);
3505%}
3506
3507instruct vadd4F(vecX dst, vecX src1, vecX src2)
3508%{
3509  predicate(n->as_Vector()->length() == 4);
3510  match(Set dst (AddVF src1 src2));
3511  ins_cost(INSN_COST);
3512  format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
3513  ins_encode %{
3514    __ fadd(as_FloatRegister($dst$$reg), __ T4S,
3515            as_FloatRegister($src1$$reg),
3516            as_FloatRegister($src2$$reg));
3517  %}
3518  ins_pipe(vdop_fp128);
3519%}
3520
3521instruct vadd2D(vecX dst, vecX src1, vecX src2)
3522%{
3523  match(Set dst (AddVD src1 src2));
3524  ins_cost(INSN_COST);
3525  format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
3526  ins_encode %{
3527    __ fadd(as_FloatRegister($dst$$reg), __ T2D,
3528            as_FloatRegister($src1$$reg),
3529            as_FloatRegister($src2$$reg));
3530  %}
3531  ins_pipe(vdop_fp128);
3532%}
3533
3534// --------------------------------- SUB --------------------------------------
3535
3536instruct vsub8B(vecD dst, vecD src1, vecD src2)
3537%{
3538  predicate(n->as_Vector()->length() == 4 ||
3539            n->as_Vector()->length() == 8);
3540  match(Set dst (SubVB src1 src2));
3541  ins_cost(INSN_COST);
3542  format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
3543  ins_encode %{
3544    __ subv(as_FloatRegister($dst$$reg), __ T8B,
3545            as_FloatRegister($src1$$reg),
3546            as_FloatRegister($src2$$reg));
3547  %}
3548  ins_pipe(vdop64);
3549%}
3550
3551instruct vsub16B(vecX dst, vecX src1, vecX src2)
3552%{
3553  predicate(n->as_Vector()->length() == 16);
3554  match(Set dst (SubVB src1 src2));
3555  ins_cost(INSN_COST);
3556  format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
3557  ins_encode %{
3558    __ subv(as_FloatRegister($dst$$reg), __ T16B,
3559            as_FloatRegister($src1$$reg),
3560            as_FloatRegister($src2$$reg));
3561  %}
3562  ins_pipe(vdop128);
3563%}
3564
3565instruct vsub4S(vecD dst, vecD src1, vecD src2)
3566%{
3567  predicate(n->as_Vector()->length() == 2 ||
3568            n->as_Vector()->length() == 4);
3569  match(Set dst (SubVS src1 src2));
3570  ins_cost(INSN_COST);
3571  format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
3572  ins_encode %{
3573    __ subv(as_FloatRegister($dst$$reg), __ T4H,
3574            as_FloatRegister($src1$$reg),
3575            as_FloatRegister($src2$$reg));
3576  %}
3577  ins_pipe(vdop64);
3578%}
3579
3580instruct vsub8S(vecX dst, vecX src1, vecX src2)
3581%{
3582  predicate(n->as_Vector()->length() == 8);
3583  match(Set dst (SubVS src1 src2));
3584  ins_cost(INSN_COST);
3585  format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
3586  ins_encode %{
3587    __ subv(as_FloatRegister($dst$$reg), __ T8H,
3588            as_FloatRegister($src1$$reg),
3589            as_FloatRegister($src2$$reg));
3590  %}
3591  ins_pipe(vdop128);
3592%}
3593
3594instruct vsub2I(vecD dst, vecD src1, vecD src2)
3595%{
3596  predicate(n->as_Vector()->length() == 2);
3597  match(Set dst (SubVI src1 src2));
3598  ins_cost(INSN_COST);
3599  format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
3600  ins_encode %{
3601    __ subv(as_FloatRegister($dst$$reg), __ T2S,
3602            as_FloatRegister($src1$$reg),
3603            as_FloatRegister($src2$$reg));
3604  %}
3605  ins_pipe(vdop64);
3606%}
3607
3608instruct vsub4I(vecX dst, vecX src1, vecX src2)
3609%{
3610  predicate(n->as_Vector()->length() == 4);
3611  match(Set dst (SubVI src1 src2));
3612  ins_cost(INSN_COST);
3613  format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
3614  ins_encode %{
3615    __ subv(as_FloatRegister($dst$$reg), __ T4S,
3616            as_FloatRegister($src1$$reg),
3617            as_FloatRegister($src2$$reg));
3618  %}
3619  ins_pipe(vdop128);
3620%}
3621
3622instruct vsub2L(vecX dst, vecX src1, vecX src2)
3623%{
3624  predicate(n->as_Vector()->length() == 2);
3625  match(Set dst (SubVL src1 src2));
3626  ins_cost(INSN_COST);
3627  format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
3628  ins_encode %{
3629    __ subv(as_FloatRegister($dst$$reg), __ T2D,
3630            as_FloatRegister($src1$$reg),
3631            as_FloatRegister($src2$$reg));
3632  %}
3633  ins_pipe(vdop128);
3634%}
3635
3636instruct vsub2F(vecD dst, vecD src1, vecD src2)
3637%{
3638  predicate(n->as_Vector()->length() == 2);
3639  match(Set dst (SubVF src1 src2));
3640  ins_cost(INSN_COST);
3641  format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
3642  ins_encode %{
3643    __ fsub(as_FloatRegister($dst$$reg), __ T2S,
3644            as_FloatRegister($src1$$reg),
3645            as_FloatRegister($src2$$reg));
3646  %}
3647  ins_pipe(vdop_fp64);
3648%}
3649
3650instruct vsub4F(vecX dst, vecX src1, vecX src2)
3651%{
3652  predicate(n->as_Vector()->length() == 4);
3653  match(Set dst (SubVF src1 src2));
3654  ins_cost(INSN_COST);
3655  format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
3656  ins_encode %{
3657    __ fsub(as_FloatRegister($dst$$reg), __ T4S,
3658            as_FloatRegister($src1$$reg),
3659            as_FloatRegister($src2$$reg));
3660  %}
3661  ins_pipe(vdop_fp128);
3662%}
3663
3664instruct vsub2D(vecX dst, vecX src1, vecX src2)
3665%{
3666  predicate(n->as_Vector()->length() == 2);
3667  match(Set dst (SubVD src1 src2));
3668  ins_cost(INSN_COST);
3669  format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
3670  ins_encode %{
3671    __ fsub(as_FloatRegister($dst$$reg), __ T2D,
3672            as_FloatRegister($src1$$reg),
3673            as_FloatRegister($src2$$reg));
3674  %}
3675  ins_pipe(vdop_fp128);
3676%}
3677
3678// --------------------------------- MUL --------------------------------------
3679
3680instruct vmul8B(vecD dst, vecD src1, vecD src2)
3681%{
3682  predicate(n->as_Vector()->length() == 4 ||
3683            n->as_Vector()->length() == 8);
3684  match(Set dst (MulVB src1 src2));
3685  ins_cost(INSN_COST);
3686  format %{ "mulv  $dst,$src1,$src2\t# vector (8B)" %}
3687  ins_encode %{
3688    __ mulv(as_FloatRegister($dst$$reg), __ T8B,
3689            as_FloatRegister($src1$$reg),
3690            as_FloatRegister($src2$$reg));
3691  %}
3692  ins_pipe(vmul64);
3693%}
3694
3695instruct vmul16B(vecX dst, vecX src1, vecX src2)
3696%{
3697  predicate(n->as_Vector()->length() == 16);
3698  match(Set dst (MulVB src1 src2));
3699  ins_cost(INSN_COST);
3700  format %{ "mulv  $dst,$src1,$src2\t# vector (16B)" %}
3701  ins_encode %{
3702    __ mulv(as_FloatRegister($dst$$reg), __ T16B,
3703            as_FloatRegister($src1$$reg),
3704            as_FloatRegister($src2$$reg));
3705  %}
3706  ins_pipe(vmul128);
3707%}
3708
3709instruct vmul4S(vecD dst, vecD src1, vecD src2)
3710%{
3711  predicate(n->as_Vector()->length() == 2 ||
3712            n->as_Vector()->length() == 4);
3713  match(Set dst (MulVS src1 src2));
3714  ins_cost(INSN_COST);
3715  format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
3716  ins_encode %{
3717    __ mulv(as_FloatRegister($dst$$reg), __ T4H,
3718            as_FloatRegister($src1$$reg),
3719            as_FloatRegister($src2$$reg));
3720  %}
3721  ins_pipe(vmul64);
3722%}
3723
3724instruct vmul8S(vecX dst, vecX src1, vecX src2)
3725%{
3726  predicate(n->as_Vector()->length() == 8);
3727  match(Set dst (MulVS src1 src2));
3728  ins_cost(INSN_COST);
3729  format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
3730  ins_encode %{
3731    __ mulv(as_FloatRegister($dst$$reg), __ T8H,
3732            as_FloatRegister($src1$$reg),
3733            as_FloatRegister($src2$$reg));
3734  %}
3735  ins_pipe(vmul128);
3736%}
3737
3738instruct vmul2I(vecD dst, vecD src1, vecD src2)
3739%{
3740  predicate(n->as_Vector()->length() == 2);
3741  match(Set dst (MulVI src1 src2));
3742  ins_cost(INSN_COST);
3743  format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
3744  ins_encode %{
3745    __ mulv(as_FloatRegister($dst$$reg), __ T2S,
3746            as_FloatRegister($src1$$reg),
3747            as_FloatRegister($src2$$reg));
3748  %}
3749  ins_pipe(vmul64);
3750%}
3751
3752instruct vmul4I(vecX dst, vecX src1, vecX src2)
3753%{
3754  predicate(n->as_Vector()->length() == 4);
3755  match(Set dst (MulVI src1 src2));
3756  ins_cost(INSN_COST);
3757  format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
3758  ins_encode %{
3759    __ mulv(as_FloatRegister($dst$$reg), __ T4S,
3760            as_FloatRegister($src1$$reg),
3761            as_FloatRegister($src2$$reg));
3762  %}
3763  ins_pipe(vmul128);
3764%}
3765
3766instruct vmul2F(vecD dst, vecD src1, vecD src2)
3767%{
3768  predicate(n->as_Vector()->length() == 2);
3769  match(Set dst (MulVF src1 src2));
3770  ins_cost(INSN_COST);
3771  format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
3772  ins_encode %{
3773    __ fmul(as_FloatRegister($dst$$reg), __ T2S,
3774            as_FloatRegister($src1$$reg),
3775            as_FloatRegister($src2$$reg));
3776  %}
3777  ins_pipe(vmuldiv_fp64);
3778%}
3779
3780instruct vmul4F(vecX dst, vecX src1, vecX src2)
3781%{
3782  predicate(n->as_Vector()->length() == 4);
3783  match(Set dst (MulVF src1 src2));
3784  ins_cost(INSN_COST);
3785  format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
3786  ins_encode %{
3787    __ fmul(as_FloatRegister($dst$$reg), __ T4S,
3788            as_FloatRegister($src1$$reg),
3789            as_FloatRegister($src2$$reg));
3790  %}
3791  ins_pipe(vmuldiv_fp128);
3792%}
3793
3794instruct vmul2D(vecX dst, vecX src1, vecX src2)
3795%{
3796  predicate(n->as_Vector()->length() == 2);
3797  match(Set dst (MulVD src1 src2));
3798  ins_cost(INSN_COST);
3799  format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
3800  ins_encode %{
3801    __ fmul(as_FloatRegister($dst$$reg), __ T2D,
3802            as_FloatRegister($src1$$reg),
3803            as_FloatRegister($src2$$reg));
3804  %}
3805  ins_pipe(vmuldiv_fp128);
3806%}
3807
3808// --------------------------------- MLA --------------------------------------
3809
3810instruct vmla4S(vecD dst, vecD src1, vecD src2)
3811%{
3812  predicate(n->as_Vector()->length() == 2 ||
3813            n->as_Vector()->length() == 4);
3814  match(Set dst (AddVS dst (MulVS src1 src2)));
3815  ins_cost(INSN_COST);
3816  format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
3817  ins_encode %{
3818    __ mlav(as_FloatRegister($dst$$reg), __ T4H,
3819            as_FloatRegister($src1$$reg),
3820            as_FloatRegister($src2$$reg));
3821  %}
3822  ins_pipe(vmla64);
3823%}
3824
3825instruct vmla8S(vecX dst, vecX src1, vecX src2)
3826%{
3827  predicate(n->as_Vector()->length() == 8);
3828  match(Set dst (AddVS dst (MulVS src1 src2)));
3829  ins_cost(INSN_COST);
3830  format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
3831  ins_encode %{
3832    __ mlav(as_FloatRegister($dst$$reg), __ T8H,
3833            as_FloatRegister($src1$$reg),
3834            as_FloatRegister($src2$$reg));
3835  %}
3836  ins_pipe(vmla128);
3837%}
3838
3839instruct vmla2I(vecD dst, vecD src1, vecD src2)
3840%{
3841  predicate(n->as_Vector()->length() == 2);
3842  match(Set dst (AddVI dst (MulVI src1 src2)));
3843  ins_cost(INSN_COST);
3844  format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
3845  ins_encode %{
3846    __ mlav(as_FloatRegister($dst$$reg), __ T2S,
3847            as_FloatRegister($src1$$reg),
3848            as_FloatRegister($src2$$reg));
3849  %}
3850  ins_pipe(vmla64);
3851%}
3852
3853instruct vmla4I(vecX dst, vecX src1, vecX src2)
3854%{
3855  predicate(n->as_Vector()->length() == 4);
3856  match(Set dst (AddVI dst (MulVI src1 src2)));
3857  ins_cost(INSN_COST);
3858  format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
3859  ins_encode %{
3860    __ mlav(as_FloatRegister($dst$$reg), __ T4S,
3861            as_FloatRegister($src1$$reg),
3862            as_FloatRegister($src2$$reg));
3863  %}
3864  ins_pipe(vmla128);
3865%}
3866
3867// dst + src1 * src2
3868instruct vmla2F(vecD dst, vecD src1, vecD src2)
3869%{
3870  predicate(UseFMA && n->as_Vector()->length() == 2);
3871  match(Set dst (FmaVF  dst (Binary src1 src2)));
3872  ins_cost(INSN_COST);
3873  format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
3874  ins_encode %{
3875    __ fmla(as_FloatRegister($dst$$reg), __ T2S,
3876            as_FloatRegister($src1$$reg),
3877            as_FloatRegister($src2$$reg));
3878  %}
3879  ins_pipe(vmuldiv_fp64);
3880%}
3881
3882// dst + src1 * src2
3883instruct vmla4F(vecX dst, vecX src1, vecX src2)
3884%{
3885  predicate(UseFMA && n->as_Vector()->length() == 4);
3886  match(Set dst (FmaVF  dst (Binary src1 src2)));
3887  ins_cost(INSN_COST);
3888  format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
3889  ins_encode %{
3890    __ fmla(as_FloatRegister($dst$$reg), __ T4S,
3891            as_FloatRegister($src1$$reg),
3892            as_FloatRegister($src2$$reg));
3893  %}
3894  ins_pipe(vmuldiv_fp128);
3895%}
3896
3897// dst + src1 * src2
3898instruct vmla2D(vecX dst, vecX src1, vecX src2)
3899%{
3900  predicate(UseFMA && n->as_Vector()->length() == 2);
3901  match(Set dst (FmaVD  dst (Binary src1 src2)));
3902  ins_cost(INSN_COST);
3903  format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
3904  ins_encode %{
3905    __ fmla(as_FloatRegister($dst$$reg), __ T2D,
3906            as_FloatRegister($src1$$reg),
3907            as_FloatRegister($src2$$reg));
3908  %}
3909  ins_pipe(vmuldiv_fp128);
3910%}
3911
3912// --------------------------------- MLS --------------------------------------
3913
3914instruct vmls4S(vecD dst, vecD src1, vecD src2) %{
3915  predicate(n->as_Vector()->length() == 2 ||
3916            n->as_Vector()->length() == 4);
3917  match(Set dst (SubVS dst (MulVS src1 src2)));
3918  ins_cost(INSN_COST);
3919  format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
3920  ins_encode %{
3921    __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
3922            as_FloatRegister($src1$$reg),
3923            as_FloatRegister($src2$$reg));
3924  %}
3925  ins_pipe(vmla64);
3926%}
3927
3928instruct vmls8S(vecX dst, vecX src1, vecX src2) %{
3929  predicate(n->as_Vector()->length() == 8);
3930  match(Set dst (SubVS dst (MulVS src1 src2)));
3931  ins_cost(INSN_COST);
3932  format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
3933  ins_encode %{
3934    __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
3935            as_FloatRegister($src1$$reg),
3936            as_FloatRegister($src2$$reg));
3937  %}
3938  ins_pipe(vmla128);
3939%}
3940
3941instruct vmls2I(vecD dst, vecD src1, vecD src2) %{
3942  predicate(n->as_Vector()->length() == 2);
3943  match(Set dst (SubVI dst (MulVI src1 src2)));
3944  ins_cost(INSN_COST);
3945  format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
3946  ins_encode %{
3947    __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
3948            as_FloatRegister($src1$$reg),
3949            as_FloatRegister($src2$$reg));
3950  %}
3951  ins_pipe(vmla64);
3952%}
3953
3954instruct vmls4I(vecX dst, vecX src1, vecX src2) %{
3955  predicate(n->as_Vector()->length() == 4);
3956  match(Set dst (SubVI dst (MulVI src1 src2)));
3957  ins_cost(INSN_COST);
3958  format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
3959  ins_encode %{
3960    __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
3961            as_FloatRegister($src1$$reg),
3962            as_FloatRegister($src2$$reg));
3963  %}
3964  ins_pipe(vmla128);
3965%}
3966
3967// dst - src1 * src2
3968instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
3969  predicate(UseFMA && n->as_Vector()->length() == 2);
3970  match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
3971  match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
3972  ins_cost(INSN_COST);
3973  format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
3974  ins_encode %{
3975    __ fmls(as_FloatRegister($dst$$reg), __ T2S,
3976            as_FloatRegister($src1$$reg),
3977            as_FloatRegister($src2$$reg));
3978  %}
3979  ins_pipe(vmuldiv_fp64);
3980%}
3981
3982// dst - src1 * src2
3983instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
3984  predicate(UseFMA && n->as_Vector()->length() == 4);
3985  match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
3986  match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
3987  ins_cost(INSN_COST);
3988  format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
3989  ins_encode %{
3990    __ fmls(as_FloatRegister($dst$$reg), __ T4S,
3991            as_FloatRegister($src1$$reg),
3992            as_FloatRegister($src2$$reg));
3993  %}
3994  ins_pipe(vmuldiv_fp128);
3995%}
3996
3997// dst - src1 * src2
3998instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
3999  predicate(UseFMA && n->as_Vector()->length() == 2);
4000  match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
4001  match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
4002  ins_cost(INSN_COST);
4003  format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
4004  ins_encode %{
4005    __ fmls(as_FloatRegister($dst$$reg), __ T2D,
4006            as_FloatRegister($src1$$reg),
4007            as_FloatRegister($src2$$reg));
4008  %}
4009  ins_pipe(vmuldiv_fp128);
4010%}
4011
4012// --------------- Vector Multiply-Add Shorts into Integer --------------------
4013
4014instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{
4015  predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
4016  match(Set dst (MulAddVS2VI src1 src2));
4017  ins_cost(INSN_COST);
4018  effect(TEMP_DEF dst, TEMP tmp);
4019  format %{ "smullv  $tmp, $src1, $src2\t# vector (4H)\n\t"
4020            "smullv  $dst, $src1, $src2\t# vector (8H)\n\t"
4021            "addpv   $dst, $tmp, $dst\t# vector (4S)\n\t" %}
4022  ins_encode %{
4023    __ smullv(as_FloatRegister($tmp$$reg), __ T4H,
4024              as_FloatRegister($src1$$reg),
4025              as_FloatRegister($src2$$reg));
4026    __ smullv(as_FloatRegister($dst$$reg), __ T8H,
4027              as_FloatRegister($src1$$reg),
4028              as_FloatRegister($src2$$reg));
4029    __ addpv(as_FloatRegister($dst$$reg), __ T4S,
4030             as_FloatRegister($tmp$$reg),
4031             as_FloatRegister($dst$$reg));
4032  %}
4033  ins_pipe(vmuldiv_fp128);
4034%}
4035
4036// --------------------------------- DIV --------------------------------------
4037
4038instruct vdiv2F(vecD dst, vecD src1, vecD src2)
4039%{
4040  predicate(n->as_Vector()->length() == 2);
4041  match(Set dst (DivVF src1 src2));
4042  ins_cost(INSN_COST);
4043  format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
4044  ins_encode %{
4045    __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
4046            as_FloatRegister($src1$$reg),
4047            as_FloatRegister($src2$$reg));
4048  %}
4049  ins_pipe(vmuldiv_fp64);
4050%}
4051
4052instruct vdiv4F(vecX dst, vecX src1, vecX src2)
4053%{
4054  predicate(n->as_Vector()->length() == 4);
4055  match(Set dst (DivVF src1 src2));
4056  ins_cost(INSN_COST);
4057  format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
4058  ins_encode %{
4059    __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
4060            as_FloatRegister($src1$$reg),
4061            as_FloatRegister($src2$$reg));
4062  %}
4063  ins_pipe(vmuldiv_fp128);
4064%}
4065
4066instruct vdiv2D(vecX dst, vecX src1, vecX src2)
4067%{
4068  predicate(n->as_Vector()->length() == 2);
4069  match(Set dst (DivVD src1 src2));
4070  ins_cost(INSN_COST);
4071  format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
4072  ins_encode %{
4073    __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
4074            as_FloatRegister($src1$$reg),
4075            as_FloatRegister($src2$$reg));
4076  %}
4077  ins_pipe(vmuldiv_fp128);
4078%}
4079
4080// --------------------------------- SQRT -------------------------------------
4081
4082instruct vsqrt2F(vecD dst, vecD src)
4083%{
4084  predicate(n->as_Vector()->length() == 2);
4085  match(Set dst (SqrtVF src));
4086  format %{ "fsqrt  $dst, $src\t# vector (2F)" %}
4087  ins_encode %{
4088    __ fsqrt(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
4089  %}
4090  ins_pipe(vunop_fp64);
4091%}
4092
4093instruct vsqrt4F(vecX dst, vecX src)
4094%{
4095  predicate(n->as_Vector()->length() == 4);
4096  match(Set dst (SqrtVF src));
4097  format %{ "fsqrt  $dst, $src\t# vector (4F)" %}
4098  ins_encode %{
4099    __ fsqrt(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
4100  %}
4101  ins_pipe(vsqrt_fp128);
4102%}
4103
4104instruct vsqrt2D(vecX dst, vecX src)
4105%{
4106  predicate(n->as_Vector()->length() == 2);
4107  match(Set dst (SqrtVD src));
4108  format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
4109  ins_encode %{
4110    __ fsqrt(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
4111  %}
4112  ins_pipe(vsqrt_fp128);
4113%}
4114
4115// --------------------------------- NEG --------------------------------------
4116
4117instruct vneg2F(vecD dst, vecD src)
4118%{
4119  predicate(n->as_Vector()->length() == 2);
4120  match(Set dst (NegVF src));
4121  ins_cost(INSN_COST * 3);
4122  format %{ "fneg  $dst,$src\t# vector (2S)" %}
4123  ins_encode %{
4124    __ fneg(as_FloatRegister($dst$$reg), __ T2S,
4125            as_FloatRegister($src$$reg));
4126  %}
4127  ins_pipe(vunop_fp64);
4128%}
4129
4130instruct vneg4F(vecX dst, vecX src)
4131%{
4132  predicate(n->as_Vector()->length() == 4);
4133  match(Set dst (NegVF src));
4134  ins_cost(INSN_COST * 3);
4135  format %{ "fneg  $dst,$src\t# vector (4S)" %}
4136  ins_encode %{
4137    __ fneg(as_FloatRegister($dst$$reg), __ T4S,
4138            as_FloatRegister($src$$reg));
4139  %}
4140  ins_pipe(vunop_fp128);
4141%}
4142
4143instruct vneg2D(vecX dst, vecX src)
4144%{
4145  predicate(n->as_Vector()->length() == 2);
4146  match(Set dst (NegVD src));
4147  ins_cost(INSN_COST * 3);
4148  format %{ "fneg  $dst,$src\t# vector (2D)" %}
4149  ins_encode %{
4150    __ fneg(as_FloatRegister($dst$$reg), __ T2D,
4151            as_FloatRegister($src$$reg));
4152  %}
4153  ins_pipe(vunop_fp128);
4154%}
4155
4156// --------------------------------- AND --------------------------------------
4157
4158instruct vand8B(vecD dst, vecD src1, vecD src2)
4159%{
4160  predicate(n->as_Vector()->length_in_bytes() == 4 ||
4161            n->as_Vector()->length_in_bytes() == 8);
4162  match(Set dst (AndV src1 src2));
4163  ins_cost(INSN_COST);
4164  format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
4165  ins_encode %{
4166    __ andr(as_FloatRegister($dst$$reg), __ T8B,
4167            as_FloatRegister($src1$$reg),
4168            as_FloatRegister($src2$$reg));
4169  %}
4170  ins_pipe(vlogical64);
4171%}
4172
4173instruct vand16B(vecX dst, vecX src1, vecX src2)
4174%{
4175  predicate(n->as_Vector()->length_in_bytes() == 16);
4176  match(Set dst (AndV src1 src2));
4177  ins_cost(INSN_COST);
4178  format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
4179  ins_encode %{
4180    __ andr(as_FloatRegister($dst$$reg), __ T16B,
4181            as_FloatRegister($src1$$reg),
4182            as_FloatRegister($src2$$reg));
4183  %}
4184  ins_pipe(vlogical128);
4185%}
4186
4187// --------------------------------- OR ---------------------------------------
4188
4189instruct vor8B(vecD dst, vecD src1, vecD src2)
4190%{
4191  predicate(n->as_Vector()->length_in_bytes() == 4 ||
4192            n->as_Vector()->length_in_bytes() == 8);
4193  match(Set dst (OrV src1 src2));
4194  ins_cost(INSN_COST);
4195  format %{ "orr  $dst,$src1,$src2\t# vector (8B)" %}
4196  ins_encode %{
4197    __ orr(as_FloatRegister($dst$$reg), __ T8B,
4198            as_FloatRegister($src1$$reg),
4199            as_FloatRegister($src2$$reg));
4200  %}
4201  ins_pipe(vlogical64);
4202%}
4203
4204instruct vor16B(vecX dst, vecX src1, vecX src2)
4205%{
4206  predicate(n->as_Vector()->length_in_bytes() == 16);
4207  match(Set dst (OrV src1 src2));
4208  ins_cost(INSN_COST);
4209  format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
4210  ins_encode %{
4211    __ orr(as_FloatRegister($dst$$reg), __ T16B,
4212            as_FloatRegister($src1$$reg),
4213            as_FloatRegister($src2$$reg));
4214  %}
4215  ins_pipe(vlogical128);
4216%}
4217
4218// --------------------------------- XOR --------------------------------------
4219
4220instruct vxor8B(vecD dst, vecD src1, vecD src2)
4221%{
4222  predicate(n->as_Vector()->length_in_bytes() == 4 ||
4223            n->as_Vector()->length_in_bytes() == 8);
4224  match(Set dst (XorV src1 src2));
4225  ins_cost(INSN_COST);
4226  format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
4227  ins_encode %{
4228    __ eor(as_FloatRegister($dst$$reg), __ T8B,
4229            as_FloatRegister($src1$$reg),
4230            as_FloatRegister($src2$$reg));
4231  %}
4232  ins_pipe(vlogical64);
4233%}
4234
4235instruct vxor16B(vecX dst, vecX src1, vecX src2)
4236%{
4237  predicate(n->as_Vector()->length_in_bytes() == 16);
4238  match(Set dst (XorV src1 src2));
4239  ins_cost(INSN_COST);
4240  format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
4241  ins_encode %{
4242    __ eor(as_FloatRegister($dst$$reg), __ T16B,
4243            as_FloatRegister($src1$$reg),
4244            as_FloatRegister($src2$$reg));
4245  %}
4246  ins_pipe(vlogical128);
4247%}
4248
4249// ------------------------------ Shift ---------------------------------------
4250
4251instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
4252  predicate(n->as_Vector()->length_in_bytes() == 4 ||
4253            n->as_Vector()->length_in_bytes() == 8);
4254  match(Set dst (LShiftCntV cnt));
4255  match(Set dst (RShiftCntV cnt));
4256  format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
4257  ins_encode %{
4258    __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
4259  %}
4260  ins_pipe(vdup_reg_reg64);
4261%}
4262
4263instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
4264  predicate(n->as_Vector()->length_in_bytes() == 16);
4265  match(Set dst (LShiftCntV cnt));
4266  match(Set dst (RShiftCntV cnt));
4267  format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
4268  ins_encode %{
4269    __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
4270  %}
4271  ins_pipe(vdup_reg_reg128);
4272%}
4273
4274instruct vsll8B(vecD dst, vecD src, vecD shift) %{
4275  predicate(n->as_Vector()->length() == 4 ||
4276            n->as_Vector()->length() == 8);
4277  match(Set dst (LShiftVB src shift));
4278  ins_cost(INSN_COST);
4279  format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
4280  ins_encode %{
4281    __ sshl(as_FloatRegister($dst$$reg), __ T8B,
4282            as_FloatRegister($src$$reg),
4283            as_FloatRegister($shift$$reg));
4284  %}
4285  ins_pipe(vshift64);
4286%}
4287
4288instruct vsll16B(vecX dst, vecX src, vecX shift) %{
4289  predicate(n->as_Vector()->length() == 16);
4290  match(Set dst (LShiftVB src shift));
4291  ins_cost(INSN_COST);
4292  format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
4293  ins_encode %{
4294    __ sshl(as_FloatRegister($dst$$reg), __ T16B,
4295            as_FloatRegister($src$$reg),
4296            as_FloatRegister($shift$$reg));
4297  %}
4298  ins_pipe(vshift128);
4299%}
4300
4301// Right shifts with vector shift count on aarch64 SIMD are implemented
4302// as left shift by negative shift count.
4303// There are two cases for vector shift count.
4304//
4305// Case 1: The vector shift count is from replication.
4306//        |            |
4307//    LoadVector  RShiftCntV
4308//        |       /
4309//     RShiftVI
4310// Note: In inner loop, multiple neg instructions are used, which can be
4311// moved to outer loop and merge into one neg instruction.
4312//
4313// Case 2: The vector shift count is from loading.
4314// This case isn't supported by middle-end now. But it's supported by
4315// panama/vectorIntrinsics(JEP 338: Vector API).
4316//        |            |
4317//    LoadVector  LoadVector
4318//        |       /
4319//     RShiftVI
4320//
4321
4322instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
4323  predicate(n->as_Vector()->length() == 4 ||
4324            n->as_Vector()->length() == 8);
4325  match(Set dst (RShiftVB src shift));
4326  ins_cost(INSN_COST);
4327  effect(TEMP tmp);
4328  format %{ "negr  $tmp,$shift\t"
4329            "sshl  $dst,$src,$tmp\t# vector (8B)" %}
4330  ins_encode %{
4331    __ negr(as_FloatRegister($tmp$$reg), __ T8B,
4332            as_FloatRegister($shift$$reg));
4333    __ sshl(as_FloatRegister($dst$$reg), __ T8B,
4334            as_FloatRegister($src$$reg),
4335            as_FloatRegister($tmp$$reg));
4336  %}
4337  ins_pipe(vshift64);
4338%}
4339
4340instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
4341  predicate(n->as_Vector()->length() == 16);
4342  match(Set dst (RShiftVB src shift));
4343  ins_cost(INSN_COST);
4344  effect(TEMP tmp);
4345  format %{ "negr  $tmp,$shift\t"
4346            "sshl  $dst,$src,$tmp\t# vector (16B)" %}
4347  ins_encode %{
4348    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4349            as_FloatRegister($shift$$reg));
4350    __ sshl(as_FloatRegister($dst$$reg), __ T16B,
4351            as_FloatRegister($src$$reg),
4352            as_FloatRegister($tmp$$reg));
4353  %}
4354  ins_pipe(vshift128);
4355%}
4356
4357instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
4358  predicate(n->as_Vector()->length() == 4 ||
4359            n->as_Vector()->length() == 8);
4360  match(Set dst (URShiftVB src shift));
4361  ins_cost(INSN_COST);
4362  effect(TEMP tmp);
4363  format %{ "negr  $tmp,$shift\t"
4364            "ushl  $dst,$src,$tmp\t# vector (8B)" %}
4365  ins_encode %{
4366    __ negr(as_FloatRegister($tmp$$reg), __ T8B,
4367            as_FloatRegister($shift$$reg));
4368    __ ushl(as_FloatRegister($dst$$reg), __ T8B,
4369            as_FloatRegister($src$$reg),
4370            as_FloatRegister($tmp$$reg));
4371  %}
4372  ins_pipe(vshift64);
4373%}
4374
4375instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
4376  predicate(n->as_Vector()->length() == 16);
4377  match(Set dst (URShiftVB src shift));
4378  ins_cost(INSN_COST);
4379  effect(TEMP tmp);
4380  format %{ "negr  $tmp,$shift\t"
4381            "ushl  $dst,$src,$tmp\t# vector (16B)" %}
4382  ins_encode %{
4383    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4384            as_FloatRegister($shift$$reg));
4385    __ ushl(as_FloatRegister($dst$$reg), __ T16B,
4386            as_FloatRegister($src$$reg),
4387            as_FloatRegister($tmp$$reg));
4388  %}
4389  ins_pipe(vshift128);
4390%}
4391
4392instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
4393  predicate(n->as_Vector()->length() == 4 ||
4394            n->as_Vector()->length() == 8);
4395  match(Set dst (LShiftVB src (LShiftCntV shift)));
4396  ins_cost(INSN_COST);
4397  format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
4398  ins_encode %{
4399    int sh = (int)$shift$$constant;
4400    if (sh >= 8) {
4401      __ eor(as_FloatRegister($dst$$reg), __ T8B,
4402             as_FloatRegister($src$$reg),
4403             as_FloatRegister($src$$reg));
4404    } else {
4405      __ shl(as_FloatRegister($dst$$reg), __ T8B,
4406             as_FloatRegister($src$$reg), sh);
4407    }
4408  %}
4409  ins_pipe(vshift64_imm);
4410%}
4411
4412instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
4413  predicate(n->as_Vector()->length() == 16);
4414  match(Set dst (LShiftVB src (LShiftCntV shift)));
4415  ins_cost(INSN_COST);
4416  format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
4417  ins_encode %{
4418    int sh = (int)$shift$$constant;
4419    if (sh >= 8) {
4420      __ eor(as_FloatRegister($dst$$reg), __ T16B,
4421             as_FloatRegister($src$$reg),
4422             as_FloatRegister($src$$reg));
4423    } else {
4424      __ shl(as_FloatRegister($dst$$reg), __ T16B,
4425             as_FloatRegister($src$$reg), sh);
4426    }
4427  %}
4428  ins_pipe(vshift128_imm);
4429%}
4430
4431instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
4432  predicate(n->as_Vector()->length() == 4 ||
4433            n->as_Vector()->length() == 8);
4434  match(Set dst (RShiftVB src (RShiftCntV shift)));
4435  ins_cost(INSN_COST);
4436  format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
4437  ins_encode %{
4438    int sh = (int)$shift$$constant;
4439    if (sh >= 8) sh = 7;
4440    __ sshr(as_FloatRegister($dst$$reg), __ T8B,
4441           as_FloatRegister($src$$reg), sh);
4442  %}
4443  ins_pipe(vshift64_imm);
4444%}
4445
4446instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
4447  predicate(n->as_Vector()->length() == 16);
4448  match(Set dst (RShiftVB src (RShiftCntV shift)));
4449  ins_cost(INSN_COST);
4450  format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
4451  ins_encode %{
4452    int sh = (int)$shift$$constant;
4453    if (sh >= 8) sh = 7;
4454    __ sshr(as_FloatRegister($dst$$reg), __ T16B,
4455           as_FloatRegister($src$$reg), sh);
4456  %}
4457  ins_pipe(vshift128_imm);
4458%}
4459
4460instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
4461  predicate(n->as_Vector()->length() == 4 ||
4462            n->as_Vector()->length() == 8);
4463  match(Set dst (URShiftVB src (RShiftCntV shift)));
4464  ins_cost(INSN_COST);
4465  format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
4466  ins_encode %{
4467    int sh = (int)$shift$$constant;
4468    if (sh >= 8) {
4469      __ eor(as_FloatRegister($dst$$reg), __ T8B,
4470             as_FloatRegister($src$$reg),
4471             as_FloatRegister($src$$reg));
4472    } else {
4473      __ ushr(as_FloatRegister($dst$$reg), __ T8B,
4474             as_FloatRegister($src$$reg), sh);
4475    }
4476  %}
4477  ins_pipe(vshift64_imm);
4478%}
4479
4480instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
4481  predicate(n->as_Vector()->length() == 16);
4482  match(Set dst (URShiftVB src (RShiftCntV shift)));
4483  ins_cost(INSN_COST);
4484  format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
4485  ins_encode %{
4486    int sh = (int)$shift$$constant;
4487    if (sh >= 8) {
4488      __ eor(as_FloatRegister($dst$$reg), __ T16B,
4489             as_FloatRegister($src$$reg),
4490             as_FloatRegister($src$$reg));
4491    } else {
4492      __ ushr(as_FloatRegister($dst$$reg), __ T16B,
4493             as_FloatRegister($src$$reg), sh);
4494    }
4495  %}
4496  ins_pipe(vshift128_imm);
4497%}
4498
4499instruct vsll4S(vecD dst, vecD src, vecD shift) %{
4500  predicate(n->as_Vector()->length() == 2 ||
4501            n->as_Vector()->length() == 4);
4502  match(Set dst (LShiftVS src shift));
4503  ins_cost(INSN_COST);
4504  format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
4505  ins_encode %{
4506    __ sshl(as_FloatRegister($dst$$reg), __ T4H,
4507            as_FloatRegister($src$$reg),
4508            as_FloatRegister($shift$$reg));
4509  %}
4510  ins_pipe(vshift64);
4511%}
4512
4513instruct vsll8S(vecX dst, vecX src, vecX shift) %{
4514  predicate(n->as_Vector()->length() == 8);
4515  match(Set dst (LShiftVS src shift));
4516  ins_cost(INSN_COST);
4517  format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
4518  ins_encode %{
4519    __ sshl(as_FloatRegister($dst$$reg), __ T8H,
4520            as_FloatRegister($src$$reg),
4521            as_FloatRegister($shift$$reg));
4522  %}
4523  ins_pipe(vshift128);
4524%}
4525
4526instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
4527  predicate(n->as_Vector()->length() == 2 ||
4528            n->as_Vector()->length() == 4);
4529  match(Set dst (RShiftVS src shift));
4530  ins_cost(INSN_COST);
4531  effect(TEMP tmp);
4532  format %{ "negr  $tmp,$shift\t"
4533            "sshl  $dst,$src,$tmp\t# vector (4H)" %}
4534  ins_encode %{
4535    __ negr(as_FloatRegister($tmp$$reg), __ T8B,
4536            as_FloatRegister($shift$$reg));
4537    __ sshl(as_FloatRegister($dst$$reg), __ T4H,
4538            as_FloatRegister($src$$reg),
4539            as_FloatRegister($tmp$$reg));
4540  %}
4541  ins_pipe(vshift64);
4542%}
4543
4544instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
4545  predicate(n->as_Vector()->length() == 8);
4546  match(Set dst (RShiftVS src shift));
4547  ins_cost(INSN_COST);
4548  effect(TEMP tmp);
4549  format %{ "negr  $tmp,$shift\t"
4550            "sshl  $dst,$src,$tmp\t# vector (8H)" %}
4551  ins_encode %{
4552    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4553            as_FloatRegister($shift$$reg));
4554    __ sshl(as_FloatRegister($dst$$reg), __ T8H,
4555            as_FloatRegister($src$$reg),
4556            as_FloatRegister($tmp$$reg));
4557  %}
4558  ins_pipe(vshift128);
4559%}
4560
4561instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
4562  predicate(n->as_Vector()->length() == 2 ||
4563            n->as_Vector()->length() == 4);
4564  match(Set dst (URShiftVS src shift));
4565  ins_cost(INSN_COST);
4566  effect(TEMP tmp);
4567  format %{ "negr  $tmp,$shift\t"
4568            "ushl  $dst,$src,$tmp\t# vector (4H)" %}
4569  ins_encode %{
4570    __ negr(as_FloatRegister($tmp$$reg), __ T8B,
4571            as_FloatRegister($shift$$reg));
4572    __ ushl(as_FloatRegister($dst$$reg), __ T4H,
4573            as_FloatRegister($src$$reg),
4574            as_FloatRegister($tmp$$reg));
4575  %}
4576  ins_pipe(vshift64);
4577%}
4578
4579instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
4580  predicate(n->as_Vector()->length() == 8);
4581  match(Set dst (URShiftVS src shift));
4582  ins_cost(INSN_COST);
4583  effect(TEMP tmp);
4584  format %{ "negr  $tmp,$shift\t"
4585            "ushl  $dst,$src,$tmp\t# vector (8H)" %}
4586  ins_encode %{
4587    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4588            as_FloatRegister($shift$$reg));
4589    __ ushl(as_FloatRegister($dst$$reg), __ T8H,
4590            as_FloatRegister($src$$reg),
4591            as_FloatRegister($tmp$$reg));
4592  %}
4593  ins_pipe(vshift128);
4594%}
4595
4596instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
4597  predicate(n->as_Vector()->length() == 2 ||
4598            n->as_Vector()->length() == 4);
4599  match(Set dst (LShiftVS src (LShiftCntV shift)));
4600  ins_cost(INSN_COST);
4601  format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
4602  ins_encode %{
4603    int sh = (int)$shift$$constant;
4604    if (sh >= 16) {
4605      __ eor(as_FloatRegister($dst$$reg), __ T8B,
4606             as_FloatRegister($src$$reg),
4607             as_FloatRegister($src$$reg));
4608    } else {
4609      __ shl(as_FloatRegister($dst$$reg), __ T4H,
4610             as_FloatRegister($src$$reg), sh);
4611    }
4612  %}
4613  ins_pipe(vshift64_imm);
4614%}
4615
4616instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
4617  predicate(n->as_Vector()->length() == 8);
4618  match(Set dst (LShiftVS src (LShiftCntV shift)));
4619  ins_cost(INSN_COST);
4620  format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
4621  ins_encode %{
4622    int sh = (int)$shift$$constant;
4623    if (sh >= 16) {
4624      __ eor(as_FloatRegister($dst$$reg), __ T16B,
4625             as_FloatRegister($src$$reg),
4626             as_FloatRegister($src$$reg));
4627    } else {
4628      __ shl(as_FloatRegister($dst$$reg), __ T8H,
4629             as_FloatRegister($src$$reg), sh);
4630    }
4631  %}
4632  ins_pipe(vshift128_imm);
4633%}
4634
4635instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
4636  predicate(n->as_Vector()->length() == 2 ||
4637            n->as_Vector()->length() == 4);
4638  match(Set dst (RShiftVS src (RShiftCntV shift)));
4639  ins_cost(INSN_COST);
4640  format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
4641  ins_encode %{
4642    int sh = (int)$shift$$constant;
4643    if (sh >= 16) sh = 15;
4644    __ sshr(as_FloatRegister($dst$$reg), __ T4H,
4645           as_FloatRegister($src$$reg), sh);
4646  %}
4647  ins_pipe(vshift64_imm);
4648%}
4649
4650instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
4651  predicate(n->as_Vector()->length() == 8);
4652  match(Set dst (RShiftVS src (RShiftCntV shift)));
4653  ins_cost(INSN_COST);
4654  format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
4655  ins_encode %{
4656    int sh = (int)$shift$$constant;
4657    if (sh >= 16) sh = 15;
4658    __ sshr(as_FloatRegister($dst$$reg), __ T8H,
4659           as_FloatRegister($src$$reg), sh);
4660  %}
4661  ins_pipe(vshift128_imm);
4662%}
4663
4664instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
4665  predicate(n->as_Vector()->length() == 2 ||
4666            n->as_Vector()->length() == 4);
4667  match(Set dst (URShiftVS src (RShiftCntV shift)));
4668  ins_cost(INSN_COST);
4669  format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
4670  ins_encode %{
4671    int sh = (int)$shift$$constant;
4672    if (sh >= 16) {
4673      __ eor(as_FloatRegister($dst$$reg), __ T8B,
4674             as_FloatRegister($src$$reg),
4675             as_FloatRegister($src$$reg));
4676    } else {
4677      __ ushr(as_FloatRegister($dst$$reg), __ T4H,
4678             as_FloatRegister($src$$reg), sh);
4679    }
4680  %}
4681  ins_pipe(vshift64_imm);
4682%}
4683
4684instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
4685  predicate(n->as_Vector()->length() == 8);
4686  match(Set dst (URShiftVS src (RShiftCntV shift)));
4687  ins_cost(INSN_COST);
4688  format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
4689  ins_encode %{
4690    int sh = (int)$shift$$constant;
4691    if (sh >= 16) {
4692      __ eor(as_FloatRegister($dst$$reg), __ T16B,
4693             as_FloatRegister($src$$reg),
4694             as_FloatRegister($src$$reg));
4695    } else {
4696      __ ushr(as_FloatRegister($dst$$reg), __ T8H,
4697             as_FloatRegister($src$$reg), sh);
4698    }
4699  %}
4700  ins_pipe(vshift128_imm);
4701%}
4702
4703instruct vsll2I(vecD dst, vecD src, vecD shift) %{
4704  predicate(n->as_Vector()->length() == 2);
4705  match(Set dst (LShiftVI src shift));
4706  ins_cost(INSN_COST);
4707  format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
4708  ins_encode %{
4709    __ sshl(as_FloatRegister($dst$$reg), __ T2S,
4710            as_FloatRegister($src$$reg),
4711            as_FloatRegister($shift$$reg));
4712  %}
4713  ins_pipe(vshift64);
4714%}
4715
4716instruct vsll4I(vecX dst, vecX src, vecX shift) %{
4717  predicate(n->as_Vector()->length() == 4);
4718  match(Set dst (LShiftVI src shift));
4719  ins_cost(INSN_COST);
4720  format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
4721  ins_encode %{
4722    __ sshl(as_FloatRegister($dst$$reg), __ T4S,
4723            as_FloatRegister($src$$reg),
4724            as_FloatRegister($shift$$reg));
4725  %}
4726  ins_pipe(vshift128);
4727%}
4728
4729instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
4730  predicate(n->as_Vector()->length() == 2);
4731  match(Set dst (RShiftVI src shift));
4732  ins_cost(INSN_COST);
4733  effect(TEMP tmp);
4734  format %{ "negr  $tmp,$shift\t"
4735            "sshl  $dst,$src,$tmp\t# vector (2S)" %}
4736  ins_encode %{
4737    __ negr(as_FloatRegister($tmp$$reg), __ T8B,
4738            as_FloatRegister($shift$$reg));
4739    __ sshl(as_FloatRegister($dst$$reg), __ T2S,
4740            as_FloatRegister($src$$reg),
4741            as_FloatRegister($tmp$$reg));
4742  %}
4743  ins_pipe(vshift64);
4744%}
4745
4746instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
4747  predicate(n->as_Vector()->length() == 4);
4748  match(Set dst (RShiftVI src shift));
4749  ins_cost(INSN_COST);
4750  effect(TEMP tmp);
4751  format %{ "negr  $tmp,$shift\t"
4752            "sshl  $dst,$src,$tmp\t# vector (4S)" %}
4753  ins_encode %{
4754    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4755            as_FloatRegister($shift$$reg));
4756    __ sshl(as_FloatRegister($dst$$reg), __ T4S,
4757            as_FloatRegister($src$$reg),
4758            as_FloatRegister($tmp$$reg));
4759  %}
4760  ins_pipe(vshift128);
4761%}
4762
4763instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
4764  predicate(n->as_Vector()->length() == 2);
4765  match(Set dst (URShiftVI src shift));
4766  ins_cost(INSN_COST);
4767  effect(TEMP tmp);
4768  format %{ "negr  $tmp,$shift\t"
4769            "ushl  $dst,$src,$tmp\t# vector (2S)" %}
4770  ins_encode %{
4771    __ negr(as_FloatRegister($tmp$$reg), __ T8B,
4772            as_FloatRegister($shift$$reg));
4773    __ ushl(as_FloatRegister($dst$$reg), __ T2S,
4774            as_FloatRegister($src$$reg),
4775            as_FloatRegister($tmp$$reg));
4776  %}
4777  ins_pipe(vshift64);
4778%}
4779
4780instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
4781  predicate(n->as_Vector()->length() == 4);
4782  match(Set dst (URShiftVI src shift));
4783  ins_cost(INSN_COST);
4784  effect(TEMP tmp);
4785  format %{ "negr  $tmp,$shift\t"
4786            "ushl  $dst,$src,$tmp\t# vector (4S)" %}
4787  ins_encode %{
4788    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4789            as_FloatRegister($shift$$reg));
4790    __ ushl(as_FloatRegister($dst$$reg), __ T4S,
4791            as_FloatRegister($src$$reg),
4792            as_FloatRegister($tmp$$reg));
4793  %}
4794  ins_pipe(vshift128);
4795%}
4796
4797instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
4798  predicate(n->as_Vector()->length() == 2);
4799  match(Set dst (LShiftVI src (LShiftCntV shift)));
4800  ins_cost(INSN_COST);
4801  format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
4802  ins_encode %{
4803    __ shl(as_FloatRegister($dst$$reg), __ T2S,
4804           as_FloatRegister($src$$reg),
4805           (int)$shift$$constant);
4806  %}
4807  ins_pipe(vshift64_imm);
4808%}
4809
4810instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
4811  predicate(n->as_Vector()->length() == 4);
4812  match(Set dst (LShiftVI src (LShiftCntV shift)));
4813  ins_cost(INSN_COST);
4814  format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
4815  ins_encode %{
4816    __ shl(as_FloatRegister($dst$$reg), __ T4S,
4817           as_FloatRegister($src$$reg),
4818           (int)$shift$$constant);
4819  %}
4820  ins_pipe(vshift128_imm);
4821%}
4822
4823instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
4824  predicate(n->as_Vector()->length() == 2);
4825  match(Set dst (RShiftVI src (RShiftCntV shift)));
4826  ins_cost(INSN_COST);
4827  format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
4828  ins_encode %{
4829    __ sshr(as_FloatRegister($dst$$reg), __ T2S,
4830            as_FloatRegister($src$$reg),
4831            (int)$shift$$constant);
4832  %}
4833  ins_pipe(vshift64_imm);
4834%}
4835
4836instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
4837  predicate(n->as_Vector()->length() == 4);
4838  match(Set dst (RShiftVI src (RShiftCntV shift)));
4839  ins_cost(INSN_COST);
4840  format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
4841  ins_encode %{
4842    __ sshr(as_FloatRegister($dst$$reg), __ T4S,
4843            as_FloatRegister($src$$reg),
4844            (int)$shift$$constant);
4845  %}
4846  ins_pipe(vshift128_imm);
4847%}
4848
4849instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
4850  predicate(n->as_Vector()->length() == 2);
4851  match(Set dst (URShiftVI src (RShiftCntV shift)));
4852  ins_cost(INSN_COST);
4853  format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
4854  ins_encode %{
4855    __ ushr(as_FloatRegister($dst$$reg), __ T2S,
4856            as_FloatRegister($src$$reg),
4857            (int)$shift$$constant);
4858  %}
4859  ins_pipe(vshift64_imm);
4860%}
4861
4862instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
4863  predicate(n->as_Vector()->length() == 4);
4864  match(Set dst (URShiftVI src (RShiftCntV shift)));
4865  ins_cost(INSN_COST);
4866  format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
4867  ins_encode %{
4868    __ ushr(as_FloatRegister($dst$$reg), __ T4S,
4869            as_FloatRegister($src$$reg),
4870            (int)$shift$$constant);
4871  %}
4872  ins_pipe(vshift128_imm);
4873%}
4874
4875instruct vsll2L(vecX dst, vecX src, vecX shift) %{
4876  predicate(n->as_Vector()->length() == 2);
4877  match(Set dst (LShiftVL src shift));
4878  ins_cost(INSN_COST);
4879  format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
4880  ins_encode %{
4881    __ sshl(as_FloatRegister($dst$$reg), __ T2D,
4882            as_FloatRegister($src$$reg),
4883            as_FloatRegister($shift$$reg));
4884  %}
4885  ins_pipe(vshift128);
4886%}
4887
4888instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
4889  predicate(n->as_Vector()->length() == 2);
4890  match(Set dst (RShiftVL src shift));
4891  ins_cost(INSN_COST);
4892  effect(TEMP tmp);
4893  format %{ "negr  $tmp,$shift\t"
4894            "sshl  $dst,$src,$tmp\t# vector (2D)" %}
4895  ins_encode %{
4896    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4897            as_FloatRegister($shift$$reg));
4898    __ sshl(as_FloatRegister($dst$$reg), __ T2D,
4899            as_FloatRegister($src$$reg),
4900            as_FloatRegister($tmp$$reg));
4901  %}
4902  ins_pipe(vshift128);
4903%}
4904
4905instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
4906  predicate(n->as_Vector()->length() == 2);
4907  match(Set dst (URShiftVL src shift));
4908  ins_cost(INSN_COST);
4909  effect(TEMP tmp);
4910  format %{ "negr  $tmp,$shift\t"
4911            "ushl  $dst,$src,$tmp\t# vector (2D)" %}
4912  ins_encode %{
4913    __ negr(as_FloatRegister($tmp$$reg), __ T16B,
4914            as_FloatRegister($shift$$reg));
4915    __ ushl(as_FloatRegister($dst$$reg), __ T2D,
4916            as_FloatRegister($src$$reg),
4917            as_FloatRegister($tmp$$reg));
4918  %}
4919  ins_pipe(vshift128);
4920%}
4921
4922instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
4923  predicate(n->as_Vector()->length() == 2);
4924  match(Set dst (LShiftVL src (LShiftCntV shift)));
4925  ins_cost(INSN_COST);
4926  format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
4927  ins_encode %{
4928    __ shl(as_FloatRegister($dst$$reg), __ T2D,
4929           as_FloatRegister($src$$reg),
4930           (int)$shift$$constant);
4931  %}
4932  ins_pipe(vshift128_imm);
4933%}
4934
4935instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
4936  predicate(n->as_Vector()->length() == 2);
4937  match(Set dst (RShiftVL src (RShiftCntV shift)));
4938  ins_cost(INSN_COST);
4939  format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
4940  ins_encode %{
4941    __ sshr(as_FloatRegister($dst$$reg), __ T2D,
4942            as_FloatRegister($src$$reg),
4943            (int)$shift$$constant);
4944  %}
4945  ins_pipe(vshift128_imm);
4946%}
4947
4948instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
4949  predicate(n->as_Vector()->length() == 2);
4950  match(Set dst (URShiftVL src (RShiftCntV shift)));
4951  ins_cost(INSN_COST);
4952  format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
4953  ins_encode %{
4954    __ ushr(as_FloatRegister($dst$$reg), __ T2D,
4955            as_FloatRegister($src$$reg),
4956            (int)$shift$$constant);
4957  %}
4958  ins_pipe(vshift128_imm);
4959%}
4960
4961instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{
4962  predicate(n->as_Vector()->length() == 8);
4963  match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
4964  ins_cost(INSN_COST);
4965  format %{ "ssra    $dst, $src, $shift\t# vector (8B)" %}
4966  ins_encode %{
4967    int sh = (int)$shift$$constant;
4968    if (sh >= 8) sh = 7;
4969    __ ssra(as_FloatRegister($dst$$reg), __ T8B,
4970           as_FloatRegister($src$$reg), sh);
4971  %}
4972  ins_pipe(vshift64_imm);
4973%}
4974
4975instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{
4976  predicate(n->as_Vector()->length() == 16);
4977  match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
4978  ins_cost(INSN_COST);
4979  format %{ "ssra    $dst, $src, $shift\t# vector (16B)" %}
4980  ins_encode %{
4981    int sh = (int)$shift$$constant;
4982    if (sh >= 8) sh = 7;
4983    __ ssra(as_FloatRegister($dst$$reg), __ T16B,
4984           as_FloatRegister($src$$reg), sh);
4985  %}
4986  ins_pipe(vshift128_imm);
4987%}
4988
4989instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{
4990  predicate(n->as_Vector()->length() == 4);
4991  match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
4992  ins_cost(INSN_COST);
4993  format %{ "ssra    $dst, $src, $shift\t# vector (4H)" %}
4994  ins_encode %{
4995    int sh = (int)$shift$$constant;
4996    if (sh >= 16) sh = 15;
4997    __ ssra(as_FloatRegister($dst$$reg), __ T4H,
4998           as_FloatRegister($src$$reg), sh);
4999  %}
5000  ins_pipe(vshift64_imm);
5001%}
5002
5003instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{
5004  predicate(n->as_Vector()->length() == 8);
5005  match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
5006  ins_cost(INSN_COST);
5007  format %{ "ssra    $dst, $src, $shift\t# vector (8H)" %}
5008  ins_encode %{
5009    int sh = (int)$shift$$constant;
5010    if (sh >= 16) sh = 15;
5011    __ ssra(as_FloatRegister($dst$$reg), __ T8H,
5012           as_FloatRegister($src$$reg), sh);
5013  %}
5014  ins_pipe(vshift128_imm);
5015%}
5016
5017instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{
5018  predicate(n->as_Vector()->length() == 2);
5019  match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
5020  ins_cost(INSN_COST);
5021  format %{ "ssra    $dst, $src, $shift\t# vector (2S)" %}
5022  ins_encode %{
5023    __ ssra(as_FloatRegister($dst$$reg), __ T2S,
5024            as_FloatRegister($src$$reg),
5025            (int)$shift$$constant);
5026  %}
5027  ins_pipe(vshift64_imm);
5028%}
5029
5030instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{
5031  predicate(n->as_Vector()->length() == 4);
5032  match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
5033  ins_cost(INSN_COST);
5034  format %{ "ssra    $dst, $src, $shift\t# vector (4S)" %}
5035  ins_encode %{
5036    __ ssra(as_FloatRegister($dst$$reg), __ T4S,
5037            as_FloatRegister($src$$reg),
5038            (int)$shift$$constant);
5039  %}
5040  ins_pipe(vshift128_imm);
5041%}
5042
5043instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{
5044  predicate(n->as_Vector()->length() == 2);
5045  match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift))));
5046  ins_cost(INSN_COST);
5047  format %{ "ssra    $dst, $src, $shift\t# vector (2D)" %}
5048  ins_encode %{
5049    __ ssra(as_FloatRegister($dst$$reg), __ T2D,
5050            as_FloatRegister($src$$reg),
5051            (int)$shift$$constant);
5052  %}
5053  ins_pipe(vshift128_imm);
5054%}
5055
5056instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{
5057  predicate(n->as_Vector()->length() == 8);
5058  match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
5059  ins_cost(INSN_COST);
5060  format %{ "usra    $dst, $src, $shift\t# vector (8B)" %}
5061  ins_encode %{
5062    int sh = (int)$shift$$constant;
5063    if (sh < 8) {
5064      __ usra(as_FloatRegister($dst$$reg), __ T8B,
5065             as_FloatRegister($src$$reg), sh);
5066    }
5067  %}
5068  ins_pipe(vshift64_imm);
5069%}
5070
5071instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{
5072  predicate(n->as_Vector()->length() == 16);
5073  match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
5074  ins_cost(INSN_COST);
5075  format %{ "usra    $dst, $src, $shift\t# vector (16B)" %}
5076  ins_encode %{
5077    int sh = (int)$shift$$constant;
5078    if (sh < 8) {
5079      __ usra(as_FloatRegister($dst$$reg), __ T16B,
5080             as_FloatRegister($src$$reg), sh);
5081    }
5082  %}
5083  ins_pipe(vshift128_imm);
5084%}
5085
5086instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{
5087  predicate(n->as_Vector()->length() == 4);
5088  match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
5089  ins_cost(INSN_COST);
5090  format %{ "usra    $dst, $src, $shift\t# vector (4H)" %}
5091  ins_encode %{
5092    int sh = (int)$shift$$constant;
5093    if (sh < 16) {
5094      __ usra(as_FloatRegister($dst$$reg), __ T4H,
5095             as_FloatRegister($src$$reg), sh);
5096    }
5097  %}
5098  ins_pipe(vshift64_imm);
5099%}
5100
5101instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{
5102  predicate(n->as_Vector()->length() == 8);
5103  match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
5104  ins_cost(INSN_COST);
5105  format %{ "usra    $dst, $src, $shift\t# vector (8H)" %}
5106  ins_encode %{
5107    int sh = (int)$shift$$constant;
5108    if (sh < 16) {
5109      __ usra(as_FloatRegister($dst$$reg), __ T8H,
5110             as_FloatRegister($src$$reg), sh);
5111    }
5112  %}
5113  ins_pipe(vshift128_imm);
5114%}
5115
5116instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{
5117  predicate(n->as_Vector()->length() == 2);
5118  match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
5119  ins_cost(INSN_COST);
5120  format %{ "usra    $dst, $src, $shift\t# vector (2S)" %}
5121  ins_encode %{
5122    __ usra(as_FloatRegister($dst$$reg), __ T2S,
5123            as_FloatRegister($src$$reg),
5124            (int)$shift$$constant);
5125  %}
5126  ins_pipe(vshift64_imm);
5127%}
5128
5129instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{
5130  predicate(n->as_Vector()->length() == 4);
5131  match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
5132  ins_cost(INSN_COST);
5133  format %{ "usra    $dst, $src, $shift\t# vector (4S)" %}
5134  ins_encode %{
5135    __ usra(as_FloatRegister($dst$$reg), __ T4S,
5136            as_FloatRegister($src$$reg),
5137            (int)$shift$$constant);
5138  %}
5139  ins_pipe(vshift128_imm);
5140%}
5141
5142instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{
5143  predicate(n->as_Vector()->length() == 2);
5144  match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift))));
5145  ins_cost(INSN_COST);
5146  format %{ "usra    $dst, $src, $shift\t# vector (2D)" %}
5147  ins_encode %{
5148    __ usra(as_FloatRegister($dst$$reg), __ T2D,
5149            as_FloatRegister($src$$reg),
5150            (int)$shift$$constant);
5151  %}
5152  ins_pipe(vshift128_imm);
5153%}
5154
5155instruct vmax2F(vecD dst, vecD src1, vecD src2)
5156%{
5157  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5158  match(Set dst (MaxV src1 src2));
5159  ins_cost(INSN_COST);
5160  format %{ "fmax  $dst,$src1,$src2\t# vector (2F)" %}
5161  ins_encode %{
5162    __ fmax(as_FloatRegister($dst$$reg), __ T2S,
5163            as_FloatRegister($src1$$reg),
5164            as_FloatRegister($src2$$reg));
5165  %}
5166  ins_pipe(vdop_fp64);
5167%}
5168
5169instruct vmax4F(vecX dst, vecX src1, vecX src2)
5170%{
5171  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5172  match(Set dst (MaxV src1 src2));
5173  ins_cost(INSN_COST);
5174  format %{ "fmax  $dst,$src1,$src2\t# vector (4S)" %}
5175  ins_encode %{
5176    __ fmax(as_FloatRegister($dst$$reg), __ T4S,
5177            as_FloatRegister($src1$$reg),
5178            as_FloatRegister($src2$$reg));
5179  %}
5180  ins_pipe(vdop_fp128);
5181%}
5182
5183instruct vmax2D(vecX dst, vecX src1, vecX src2)
5184%{
5185  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
5186  match(Set dst (MaxV src1 src2));
5187  ins_cost(INSN_COST);
5188  format %{ "fmax  $dst,$src1,$src2\t# vector (2D)" %}
5189  ins_encode %{
5190    __ fmax(as_FloatRegister($dst$$reg), __ T2D,
5191            as_FloatRegister($src1$$reg),
5192            as_FloatRegister($src2$$reg));
5193  %}
5194  ins_pipe(vdop_fp128);
5195%}
5196
5197instruct vmin2F(vecD dst, vecD src1, vecD src2)
5198%{
5199  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5200  match(Set dst (MinV src1 src2));
5201  ins_cost(INSN_COST);
5202  format %{ "fmin  $dst,$src1,$src2\t# vector (2F)" %}
5203  ins_encode %{
5204    __ fmin(as_FloatRegister($dst$$reg), __ T2S,
5205            as_FloatRegister($src1$$reg),
5206            as_FloatRegister($src2$$reg));
5207  %}
5208  ins_pipe(vdop_fp64);
5209%}
5210
5211instruct vmin4F(vecX dst, vecX src1, vecX src2)
5212%{
5213  predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5214  match(Set dst (MinV src1 src2));
5215  ins_cost(INSN_COST);
5216  format %{ "fmin  $dst,$src1,$src2\t# vector (4S)" %}
5217  ins_encode %{
5218    __ fmin(as_FloatRegister($dst$$reg), __ T4S,
5219            as_FloatRegister($src1$$reg),
5220            as_FloatRegister($src2$$reg));
5221  %}
5222  ins_pipe(vdop_fp128);
5223%}
5224
5225instruct vmin2D(vecX dst, vecX src1, vecX src2)
5226%{
5227  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
5228  match(Set dst (MinV src1 src2));
5229  ins_cost(INSN_COST);
5230  format %{ "fmin  $dst,$src1,$src2\t# vector (2D)" %}
5231  ins_encode %{
5232    __ fmin(as_FloatRegister($dst$$reg), __ T2D,
5233            as_FloatRegister($src1$$reg),
5234            as_FloatRegister($src2$$reg));
5235  %}
5236  ins_pipe(vdop_fp128);
5237%}
5238
5239instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
5240  predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
5241  match(Set dst (RoundDoubleModeV src rmode));
5242  format %{ "frint  $dst, $src, $rmode" %}
5243  ins_encode %{
5244    switch ($rmode$$constant) {
5245      case RoundDoubleModeNode::rmode_rint:
5246        __ frintn(as_FloatRegister($dst$$reg), __ T2D,
5247                  as_FloatRegister($src$$reg));
5248        break;
5249      case RoundDoubleModeNode::rmode_floor:
5250        __ frintm(as_FloatRegister($dst$$reg), __ T2D,
5251                  as_FloatRegister($src$$reg));
5252        break;
5253      case RoundDoubleModeNode::rmode_ceil:
5254        __ frintp(as_FloatRegister($dst$$reg), __ T2D,
5255                  as_FloatRegister($src$$reg));
5256        break;
5257    }
5258  %}
5259  ins_pipe(vdop_fp128);
5260%}
5261
5262instruct vpopcount4I(vecX dst, vecX src) %{
5263  predicate(UsePopCountInstruction && n->as_Vector()->length() == 4);
5264  match(Set dst (PopCountVI src));
5265  format %{
5266    "cnt     $dst, $src\t# vector (16B)\n\t"
5267    "uaddlp  $dst, $dst\t# vector (16B)\n\t"
5268    "uaddlp  $dst, $dst\t# vector (8H)"
5269  %}
5270  ins_encode %{
5271    __ cnt(as_FloatRegister($dst$$reg), __ T16B,
5272           as_FloatRegister($src$$reg));
5273    __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
5274              as_FloatRegister($dst$$reg));
5275    __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
5276              as_FloatRegister($dst$$reg));
5277  %}
5278  ins_pipe(pipe_class_default);
5279%}
5280
5281instruct vpopcount2I(vecD dst, vecD src) %{
5282  predicate(UsePopCountInstruction && n->as_Vector()->length() == 2);
5283  match(Set dst (PopCountVI src));
5284  format %{
5285    "cnt     $dst, $src\t# vector (8B)\n\t"
5286    "uaddlp  $dst, $dst\t# vector (8B)\n\t"
5287    "uaddlp  $dst, $dst\t# vector (4H)"
5288  %}
5289  ins_encode %{
5290    __ cnt(as_FloatRegister($dst$$reg), __ T8B,
5291           as_FloatRegister($src$$reg));
5292    __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
5293              as_FloatRegister($dst$$reg));
5294    __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
5295              as_FloatRegister($dst$$reg));
5296  %}
5297  ins_pipe(pipe_class_default);
5298%}
5299