1//
2// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Common Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
63// Word a in each register holds a Float, words ab hold a Double.
64// The whole registers are used in SSE4.2 version intrinsics,
65// array copy stubs and superword operations (see UseSSE42Intrinsics,
66// UseXMMForArrayCopy and UseSuperword flags).
67// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68// Linux ABI:   No register preserved across function calls
69//              XMM0-XMM7 might hold parameters
70// Windows ABI: XMM6-XMM15 preserved across function calls
71//              XMM0-XMM3 might hold parameters
72
73reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
75reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
76reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
77reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
78reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
79reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
80reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
81
82reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
84reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
85reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
86reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
87reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
88reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
89reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
90
91reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
93reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
94reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
95reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
96reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
97reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
98reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
99
100reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
102reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
103reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
104reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
105reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
106reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
107reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
108
109reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
111reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
112reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
113reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
114reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
115reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
116reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
117
118reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
120reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
121reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
122reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
123reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
124reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
125reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
126
127#ifdef _WIN64
128
129reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
131reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
132reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
133reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
134reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
135reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
136reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
137
138reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
140reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
141reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
142reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
143reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
144reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
145reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
146
147reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
149reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
150reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
151reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
152reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
153reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
154reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
155
156reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
158reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
159reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
160reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
161reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
162reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
163reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
164
165reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
167reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
168reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
169reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
170reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
171reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
172reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
173
174reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
176reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
177reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
178reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
179reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
180reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
181reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
182
183reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
185reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
186reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
187reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
188reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
189reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
190reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
191
192reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
194reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
195reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
196reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
197reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
198reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
199reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
200
201reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
203reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
204reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
205reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
206reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
207reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
208reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
209
210reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
212reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
213reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
214reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
215reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
216reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
217reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
218
219#else // _WIN64
220
221reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
223reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
224reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
225reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
226reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
227reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
228reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
229
230reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
232reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
233reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
234reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
235reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
236reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
237reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
238
239#ifdef _LP64
240
241reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
243reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
244reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
245reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
246reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
247reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
248reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
249
250reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
252reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
253reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
254reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
255reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
256reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
257reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
258
259reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
261reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
262reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
263reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
264reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
265reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
266reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
267
268reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
270reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
271reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
272reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
273reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
274reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
275reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
276
277reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
279reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
280reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
281reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
282reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
283reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
284reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
285
286reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
288reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
289reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
290reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
291reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
292reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
293reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
294
295reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
297reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
298reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
299reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
300reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
301reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
302reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
303
304reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
306reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
307reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
308reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
309reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
310reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
311reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
312
313#endif // _LP64
314
315#endif // _WIN64
316
317#ifdef _LP64
318reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319#else
320reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321#endif // _LP64
322
323alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
324                   XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
325                   XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
326                   XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
327                   XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
328                   XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
329                   XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
330                   XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
331#ifdef _LP64
332                  ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
333                   XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
334                   XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335                   XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336                   XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337                   XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338                   XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339                   XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340#endif
341                   );
342
343// flags allocation class should be last.
344alloc_class chunk2(RFLAGS);
345
346// Singleton class for condition codes
347reg_class int_flags(RFLAGS);
348
349// Class for all float registers
350reg_class float_reg(XMM0,
351                    XMM1,
352                    XMM2,
353                    XMM3,
354                    XMM4,
355                    XMM5,
356                    XMM6,
357                    XMM7
358#ifdef _LP64
359                   ,XMM8,
360                    XMM9,
361                    XMM10,
362                    XMM11,
363                    XMM12,
364                    XMM13,
365                    XMM14,
366                    XMM15
367#endif
368                    );
369
370// Class for all double registers
371reg_class double_reg(XMM0,  XMM0b,
372                     XMM1,  XMM1b,
373                     XMM2,  XMM2b,
374                     XMM3,  XMM3b,
375                     XMM4,  XMM4b,
376                     XMM5,  XMM5b,
377                     XMM6,  XMM6b,
378                     XMM7,  XMM7b
379#ifdef _LP64
380                    ,XMM8,  XMM8b,
381                     XMM9,  XMM9b,
382                     XMM10, XMM10b,
383                     XMM11, XMM11b,
384                     XMM12, XMM12b,
385                     XMM13, XMM13b,
386                     XMM14, XMM14b,
387                     XMM15, XMM15b
388#endif
389                     );
390
391// Class for all 32bit vector registers
392reg_class vectors_reg(XMM0,
393                      XMM1,
394                      XMM2,
395                      XMM3,
396                      XMM4,
397                      XMM5,
398                      XMM6,
399                      XMM7
400#ifdef _LP64
401                     ,XMM8,
402                      XMM9,
403                      XMM10,
404                      XMM11,
405                      XMM12,
406                      XMM13,
407                      XMM14,
408                      XMM15
409#endif
410                      );
411
412// Class for all 64bit vector registers
413reg_class vectord_reg(XMM0,  XMM0b,
414                      XMM1,  XMM1b,
415                      XMM2,  XMM2b,
416                      XMM3,  XMM3b,
417                      XMM4,  XMM4b,
418                      XMM5,  XMM5b,
419                      XMM6,  XMM6b,
420                      XMM7,  XMM7b
421#ifdef _LP64
422                     ,XMM8,  XMM8b,
423                      XMM9,  XMM9b,
424                      XMM10, XMM10b,
425                      XMM11, XMM11b,
426                      XMM12, XMM12b,
427                      XMM13, XMM13b,
428                      XMM14, XMM14b,
429                      XMM15, XMM15b
430#endif
431                      );
432
433// Class for all 128bit vector registers
434reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
435                      XMM1,  XMM1b,  XMM1c,  XMM1d,
436                      XMM2,  XMM2b,  XMM2c,  XMM2d,
437                      XMM3,  XMM3b,  XMM3c,  XMM3d,
438                      XMM4,  XMM4b,  XMM4c,  XMM4d,
439                      XMM5,  XMM5b,  XMM5c,  XMM5d,
440                      XMM6,  XMM6b,  XMM6c,  XMM6d,
441                      XMM7,  XMM7b,  XMM7c,  XMM7d
442#ifdef _LP64
443                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,
444                      XMM9,  XMM9b,  XMM9c,  XMM9d,
445                      XMM10, XMM10b, XMM10c, XMM10d,
446                      XMM11, XMM11b, XMM11c, XMM11d,
447                      XMM12, XMM12b, XMM12c, XMM12d,
448                      XMM13, XMM13b, XMM13c, XMM13d,
449                      XMM14, XMM14b, XMM14c, XMM14d,
450                      XMM15, XMM15b, XMM15c, XMM15d
451#endif
452                      );
453
454// Class for all 256bit vector registers
455reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
456                      XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
457                      XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
458                      XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
459                      XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
460                      XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
461                      XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
462                      XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
463#ifdef _LP64
464                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
465                      XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
466                      XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467                      XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468                      XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469                      XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470                      XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471                      XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472#endif
473                      );
474
475%}
476
477
478//----------SOURCE BLOCK-------------------------------------------------------
479// This is a block of C++ code which provides values, functions, and
480// definitions necessary in the rest of the architecture description
481
482source_hpp %{
483// Header information of the source block.
484// Method declarations/definitions which are used outside
485// the ad-scope can conveniently be defined here.
486//
487// To keep related declarations/definitions/uses close together,
488// we switch between source %{ }% and source_hpp %{ }% freely as needed.
489
490class CallStubImpl {
491
492  //--------------------------------------------------------------
493  //---<  Used for optimization in Compile::shorten_branches  >---
494  //--------------------------------------------------------------
495
496 public:
497  // Size of call trampoline stub.
498  static uint size_call_trampoline() {
499    return 0; // no call trampolines on this platform
500  }
501
502  // number of relocations needed by a call trampoline stub
503  static uint reloc_call_trampoline() {
504    return 0; // no call trampolines on this platform
505  }
506};
507
508class HandlerImpl {
509
510 public:
511
512  static int emit_exception_handler(CodeBuffer &cbuf);
513  static int emit_deopt_handler(CodeBuffer& cbuf);
514
515  static uint size_exception_handler() {
516    // NativeCall instruction size is the same as NativeJump.
517    // exception handler starts out as jump and can be patched to
518    // a call be deoptimization.  (4932387)
519    // Note that this value is also credited (in output.cpp) to
520    // the size of the code section.
521    return NativeJump::instruction_size;
522  }
523
524#ifdef _LP64
525  static uint size_deopt_handler() {
526    // three 5 byte instructions
527    return 15;
528  }
529#else
530  static uint size_deopt_handler() {
531    // NativeCall instruction size is the same as NativeJump.
532    // exception handler starts out as jump and can be patched to
533    // a call be deoptimization.  (4932387)
534    // Note that this value is also credited (in output.cpp) to
535    // the size of the code section.
536    return 5 + NativeJump::instruction_size; // pushl(); jmp;
537  }
538#endif
539};
540
541%} // end source_hpp
542
543source %{
544
545// Emit exception handler code.
546// Stuff framesize into a register and call a VM stub routine.
547int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
548
549  // Note that the code buffer's insts_mark is always relative to insts.
550  // That's why we must use the macroassembler to generate a handler.
551  MacroAssembler _masm(&cbuf);
552  address base = __ start_a_stub(size_exception_handler());
553  if (base == NULL) {
554    ciEnv::current()->record_failure("CodeCache is full");
555    return 0;  // CodeBuffer::expand failed
556  }
557  int offset = __ offset();
558  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
559  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
560  __ end_a_stub();
561  return offset;
562}
563
564// Emit deopt handler code.
565int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
566
567  // Note that the code buffer's insts_mark is always relative to insts.
568  // That's why we must use the macroassembler to generate a handler.
569  MacroAssembler _masm(&cbuf);
570  address base = __ start_a_stub(size_deopt_handler());
571  if (base == NULL) {
572    ciEnv::current()->record_failure("CodeCache is full");
573    return 0;  // CodeBuffer::expand failed
574  }
575  int offset = __ offset();
576
577#ifdef _LP64
578  address the_pc = (address) __ pc();
579  Label next;
580  // push a "the_pc" on the stack without destroying any registers
581  // as they all may be live.
582
583  // push address of "next"
584  __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
585  __ bind(next);
586  // adjust it so it matches "the_pc"
587  __ subptr(Address(rsp, 0), __ offset() - offset);
588#else
589  InternalAddress here(__ pc());
590  __ pushptr(here.addr());
591#endif
592
593  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
594  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
595  __ end_a_stub();
596  return offset;
597}
598
599
600//=============================================================================
601
602  // Float masks come from different places depending on platform.
603#ifdef _LP64
604  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
605  static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
606  static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
607  static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
608#else
609  static address float_signmask()  { return (address)float_signmask_pool; }
610  static address float_signflip()  { return (address)float_signflip_pool; }
611  static address double_signmask() { return (address)double_signmask_pool; }
612  static address double_signflip() { return (address)double_signflip_pool; }
613#endif
614
615
616const bool Matcher::match_rule_supported(int opcode) {
617  if (!has_match_rule(opcode))
618    return false;
619
620  switch (opcode) {
621    case Op_PopCountI:
622    case Op_PopCountL:
623      if (!UsePopCountInstruction)
624        return false;
625    break;
626    case Op_MulVI:
627      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
628        return false;
629    break;
630    case Op_CompareAndSwapL:
631#ifdef _LP64
632    case Op_CompareAndSwapP:
633#endif
634      if (!VM_Version::supports_cx8())
635        return false;
636    break;
637  }
638
639  return true;  // Per default match rules are supported.
640}
641
642// Max vector size in bytes. 0 if not supported.
643const int Matcher::vector_width_in_bytes(BasicType bt) {
644  assert(is_java_primitive(bt), "only primitive type vectors");
645  if (UseSSE < 2) return 0;
646  // SSE2 supports 128bit vectors for all types.
647  // AVX2 supports 256bit vectors for all types.
648  int size = (UseAVX > 1) ? 32 : 16;
649  // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
650  if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
651    size = 32;
652  // Use flag to limit vector size.
653  size = MIN2(size,(int)MaxVectorSize);
654  // Minimum 2 values in vector (or 4 for bytes).
655  switch (bt) {
656  case T_DOUBLE:
657  case T_LONG:
658    if (size < 16) return 0;
659  case T_FLOAT:
660  case T_INT:
661    if (size < 8) return 0;
662  case T_BOOLEAN:
663  case T_BYTE:
664  case T_CHAR:
665  case T_SHORT:
666    if (size < 4) return 0;
667    break;
668  default:
669    ShouldNotReachHere();
670  }
671  return size;
672}
673
674// Limits on vector size (number of elements) loaded into vector.
675const int Matcher::max_vector_size(const BasicType bt) {
676  return vector_width_in_bytes(bt)/type2aelembytes(bt);
677}
678const int Matcher::min_vector_size(const BasicType bt) {
679  int max_size = max_vector_size(bt);
680  // Min size which can be loaded into vector is 4 bytes.
681  int size = (type2aelembytes(bt) == 1) ? 4 : 2;
682  return MIN2(size,max_size);
683}
684
685// Vector ideal reg corresponding to specidied size in bytes
686const uint Matcher::vector_ideal_reg(int size) {
687  assert(MaxVectorSize >= size, "");
688  switch(size) {
689    case  4: return Op_VecS;
690    case  8: return Op_VecD;
691    case 16: return Op_VecX;
692    case 32: return Op_VecY;
693  }
694  ShouldNotReachHere();
695  return 0;
696}
697
698// Only lowest bits of xmm reg are used for vector shift count.
699const uint Matcher::vector_shift_count_ideal_reg(int size) {
700  return Op_VecS;
701}
702
703// x86 supports misaligned vectors store/load.
704const bool Matcher::misaligned_vectors_ok() {
705  return !AlignVector; // can be changed by flag
706}
707
708// x86 AES instructions are compatible with SunJCE expanded
709// keys, hence we do not need to pass the original key to stubs
710const bool Matcher::pass_original_key_for_aes() {
711  return false;
712}
713
714// Helper methods for MachSpillCopyNode::implementation().
715static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
716                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
717  // In 64-bit VM size calculation is very complex. Emitting instructions
718  // into scratch buffer is used to get size in 64-bit VM.
719  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
720  assert(ireg == Op_VecS || // 32bit vector
721         (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
722         (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
723         "no non-adjacent vector moves" );
724  if (cbuf) {
725    MacroAssembler _masm(cbuf);
726    int offset = __ offset();
727    switch (ireg) {
728    case Op_VecS: // copy whole register
729    case Op_VecD:
730    case Op_VecX:
731      __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
732      break;
733    case Op_VecY:
734      __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
735      break;
736    default:
737      ShouldNotReachHere();
738    }
739    int size = __ offset() - offset;
740#ifdef ASSERT
741    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
742    assert(!do_size || size == 4, "incorrect size calculattion");
743#endif
744    return size;
745#ifndef PRODUCT
746  } else if (!do_size) {
747    switch (ireg) {
748    case Op_VecS:
749    case Op_VecD:
750    case Op_VecX:
751      st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
752      break;
753    case Op_VecY:
754      st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
755      break;
756    default:
757      ShouldNotReachHere();
758    }
759#endif
760  }
761  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
762  return 4;
763}
764
765static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
766                            int stack_offset, int reg, uint ireg, outputStream* st) {
767  // In 64-bit VM size calculation is very complex. Emitting instructions
768  // into scratch buffer is used to get size in 64-bit VM.
769  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
770  if (cbuf) {
771    MacroAssembler _masm(cbuf);
772    int offset = __ offset();
773    if (is_load) {
774      switch (ireg) {
775      case Op_VecS:
776        __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
777        break;
778      case Op_VecD:
779        __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
780        break;
781      case Op_VecX:
782        __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
783        break;
784      case Op_VecY:
785        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
786        break;
787      default:
788        ShouldNotReachHere();
789      }
790    } else { // store
791      switch (ireg) {
792      case Op_VecS:
793        __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
794        break;
795      case Op_VecD:
796        __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
797        break;
798      case Op_VecX:
799        __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
800        break;
801      case Op_VecY:
802        __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
803        break;
804      default:
805        ShouldNotReachHere();
806      }
807    }
808    int size = __ offset() - offset;
809#ifdef ASSERT
810    int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
811    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
812    assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
813#endif
814    return size;
815#ifndef PRODUCT
816  } else if (!do_size) {
817    if (is_load) {
818      switch (ireg) {
819      case Op_VecS:
820        st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
821        break;
822      case Op_VecD:
823        st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
824        break;
825       case Op_VecX:
826        st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
827        break;
828      case Op_VecY:
829        st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
830        break;
831      default:
832        ShouldNotReachHere();
833      }
834    } else { // store
835      switch (ireg) {
836      case Op_VecS:
837        st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
838        break;
839      case Op_VecD:
840        st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
841        break;
842       case Op_VecX:
843        st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
844        break;
845      case Op_VecY:
846        st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
847        break;
848      default:
849        ShouldNotReachHere();
850      }
851    }
852#endif
853  }
854  int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
855  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
856  return 5+offset_size;
857}
858
859static inline jfloat replicate4_imm(int con, int width) {
860  // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
861  assert(width == 1 || width == 2, "only byte or short types here");
862  int bit_width = width * 8;
863  jint val = con;
864  val &= (1 << bit_width) - 1;  // mask off sign bits
865  while(bit_width < 32) {
866    val |= (val << bit_width);
867    bit_width <<= 1;
868  }
869  jfloat fval = *((jfloat*) &val);  // coerce to float type
870  return fval;
871}
872
873static inline jdouble replicate8_imm(int con, int width) {
874  // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
875  assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
876  int bit_width = width * 8;
877  jlong val = con;
878  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
879  while(bit_width < 64) {
880    val |= (val << bit_width);
881    bit_width <<= 1;
882  }
883  jdouble dval = *((jdouble*) &val);  // coerce to double type
884  return dval;
885}
886
887#ifndef PRODUCT
888  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
889    st->print("nop \t# %d bytes pad for loops and calls", _count);
890  }
891#endif
892
893  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
894    MacroAssembler _masm(&cbuf);
895    __ nop(_count);
896  }
897
898  uint MachNopNode::size(PhaseRegAlloc*) const {
899    return _count;
900  }
901
902#ifndef PRODUCT
903  void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
904    st->print("# breakpoint");
905  }
906#endif
907
908  void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
909    MacroAssembler _masm(&cbuf);
910    __ int3();
911  }
912
913  uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
914    return MachNode::size(ra_);
915  }
916
917%}
918
919encode %{
920
921  enc_class call_epilog %{
922    if (VerifyStackAtCalls) {
923      // Check that stack depth is unchanged: find majik cookie on stack
924      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
925      MacroAssembler _masm(&cbuf);
926      Label L;
927      __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
928      __ jccb(Assembler::equal, L);
929      // Die if stack mismatch
930      __ int3();
931      __ bind(L);
932    }
933  %}
934
935%}
936
937
938//----------OPERANDS-----------------------------------------------------------
939// Operand definitions must precede instruction definitions for correct parsing
940// in the ADLC because operands constitute user defined types which are used in
941// instruction definitions.
942
943// Vectors
944operand vecS() %{
945  constraint(ALLOC_IN_RC(vectors_reg));
946  match(VecS);
947
948  format %{ %}
949  interface(REG_INTER);
950%}
951
952operand vecD() %{
953  constraint(ALLOC_IN_RC(vectord_reg));
954  match(VecD);
955
956  format %{ %}
957  interface(REG_INTER);
958%}
959
960operand vecX() %{
961  constraint(ALLOC_IN_RC(vectorx_reg));
962  match(VecX);
963
964  format %{ %}
965  interface(REG_INTER);
966%}
967
968operand vecY() %{
969  constraint(ALLOC_IN_RC(vectory_reg));
970  match(VecY);
971
972  format %{ %}
973  interface(REG_INTER);
974%}
975
976
977// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
978
979// ============================================================================
980
981instruct ShouldNotReachHere() %{
982  match(Halt);
983  format %{ "int3\t# ShouldNotReachHere" %}
984  ins_encode %{
985    __ int3();
986  %}
987  ins_pipe(pipe_slow);
988%}
989
990// ============================================================================
991
992instruct addF_reg(regF dst, regF src) %{
993  predicate((UseSSE>=1) && (UseAVX == 0));
994  match(Set dst (AddF dst src));
995
996  format %{ "addss   $dst, $src" %}
997  ins_cost(150);
998  ins_encode %{
999    __ addss($dst$$XMMRegister, $src$$XMMRegister);
1000  %}
1001  ins_pipe(pipe_slow);
1002%}
1003
1004instruct addF_mem(regF dst, memory src) %{
1005  predicate((UseSSE>=1) && (UseAVX == 0));
1006  match(Set dst (AddF dst (LoadF src)));
1007
1008  format %{ "addss   $dst, $src" %}
1009  ins_cost(150);
1010  ins_encode %{
1011    __ addss($dst$$XMMRegister, $src$$Address);
1012  %}
1013  ins_pipe(pipe_slow);
1014%}
1015
1016instruct addF_imm(regF dst, immF con) %{
1017  predicate((UseSSE>=1) && (UseAVX == 0));
1018  match(Set dst (AddF dst con));
1019  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1020  ins_cost(150);
1021  ins_encode %{
1022    __ addss($dst$$XMMRegister, $constantaddress($con));
1023  %}
1024  ins_pipe(pipe_slow);
1025%}
1026
1027instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
1028  predicate(UseAVX > 0);
1029  match(Set dst (AddF src1 src2));
1030
1031  format %{ "vaddss  $dst, $src1, $src2" %}
1032  ins_cost(150);
1033  ins_encode %{
1034    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1035  %}
1036  ins_pipe(pipe_slow);
1037%}
1038
1039instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
1040  predicate(UseAVX > 0);
1041  match(Set dst (AddF src1 (LoadF src2)));
1042
1043  format %{ "vaddss  $dst, $src1, $src2" %}
1044  ins_cost(150);
1045  ins_encode %{
1046    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1047  %}
1048  ins_pipe(pipe_slow);
1049%}
1050
1051instruct addF_reg_imm(regF dst, regF src, immF con) %{
1052  predicate(UseAVX > 0);
1053  match(Set dst (AddF src con));
1054
1055  format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1056  ins_cost(150);
1057  ins_encode %{
1058    __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1059  %}
1060  ins_pipe(pipe_slow);
1061%}
1062
1063instruct addD_reg(regD dst, regD src) %{
1064  predicate((UseSSE>=2) && (UseAVX == 0));
1065  match(Set dst (AddD dst src));
1066
1067  format %{ "addsd   $dst, $src" %}
1068  ins_cost(150);
1069  ins_encode %{
1070    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
1071  %}
1072  ins_pipe(pipe_slow);
1073%}
1074
1075instruct addD_mem(regD dst, memory src) %{
1076  predicate((UseSSE>=2) && (UseAVX == 0));
1077  match(Set dst (AddD dst (LoadD src)));
1078
1079  format %{ "addsd   $dst, $src" %}
1080  ins_cost(150);
1081  ins_encode %{
1082    __ addsd($dst$$XMMRegister, $src$$Address);
1083  %}
1084  ins_pipe(pipe_slow);
1085%}
1086
1087instruct addD_imm(regD dst, immD con) %{
1088  predicate((UseSSE>=2) && (UseAVX == 0));
1089  match(Set dst (AddD dst con));
1090  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1091  ins_cost(150);
1092  ins_encode %{
1093    __ addsd($dst$$XMMRegister, $constantaddress($con));
1094  %}
1095  ins_pipe(pipe_slow);
1096%}
1097
1098instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
1099  predicate(UseAVX > 0);
1100  match(Set dst (AddD src1 src2));
1101
1102  format %{ "vaddsd  $dst, $src1, $src2" %}
1103  ins_cost(150);
1104  ins_encode %{
1105    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1106  %}
1107  ins_pipe(pipe_slow);
1108%}
1109
1110instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
1111  predicate(UseAVX > 0);
1112  match(Set dst (AddD src1 (LoadD src2)));
1113
1114  format %{ "vaddsd  $dst, $src1, $src2" %}
1115  ins_cost(150);
1116  ins_encode %{
1117    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1118  %}
1119  ins_pipe(pipe_slow);
1120%}
1121
1122instruct addD_reg_imm(regD dst, regD src, immD con) %{
1123  predicate(UseAVX > 0);
1124  match(Set dst (AddD src con));
1125
1126  format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1127  ins_cost(150);
1128  ins_encode %{
1129    __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1130  %}
1131  ins_pipe(pipe_slow);
1132%}
1133
1134instruct subF_reg(regF dst, regF src) %{
1135  predicate((UseSSE>=1) && (UseAVX == 0));
1136  match(Set dst (SubF dst src));
1137
1138  format %{ "subss   $dst, $src" %}
1139  ins_cost(150);
1140  ins_encode %{
1141    __ subss($dst$$XMMRegister, $src$$XMMRegister);
1142  %}
1143  ins_pipe(pipe_slow);
1144%}
1145
1146instruct subF_mem(regF dst, memory src) %{
1147  predicate((UseSSE>=1) && (UseAVX == 0));
1148  match(Set dst (SubF dst (LoadF src)));
1149
1150  format %{ "subss   $dst, $src" %}
1151  ins_cost(150);
1152  ins_encode %{
1153    __ subss($dst$$XMMRegister, $src$$Address);
1154  %}
1155  ins_pipe(pipe_slow);
1156%}
1157
1158instruct subF_imm(regF dst, immF con) %{
1159  predicate((UseSSE>=1) && (UseAVX == 0));
1160  match(Set dst (SubF dst con));
1161  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1162  ins_cost(150);
1163  ins_encode %{
1164    __ subss($dst$$XMMRegister, $constantaddress($con));
1165  %}
1166  ins_pipe(pipe_slow);
1167%}
1168
1169instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
1170  predicate(UseAVX > 0);
1171  match(Set dst (SubF src1 src2));
1172
1173  format %{ "vsubss  $dst, $src1, $src2" %}
1174  ins_cost(150);
1175  ins_encode %{
1176    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1177  %}
1178  ins_pipe(pipe_slow);
1179%}
1180
1181instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
1182  predicate(UseAVX > 0);
1183  match(Set dst (SubF src1 (LoadF src2)));
1184
1185  format %{ "vsubss  $dst, $src1, $src2" %}
1186  ins_cost(150);
1187  ins_encode %{
1188    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1189  %}
1190  ins_pipe(pipe_slow);
1191%}
1192
1193instruct subF_reg_imm(regF dst, regF src, immF con) %{
1194  predicate(UseAVX > 0);
1195  match(Set dst (SubF src con));
1196
1197  format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1198  ins_cost(150);
1199  ins_encode %{
1200    __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1201  %}
1202  ins_pipe(pipe_slow);
1203%}
1204
1205instruct subD_reg(regD dst, regD src) %{
1206  predicate((UseSSE>=2) && (UseAVX == 0));
1207  match(Set dst (SubD dst src));
1208
1209  format %{ "subsd   $dst, $src" %}
1210  ins_cost(150);
1211  ins_encode %{
1212    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
1213  %}
1214  ins_pipe(pipe_slow);
1215%}
1216
1217instruct subD_mem(regD dst, memory src) %{
1218  predicate((UseSSE>=2) && (UseAVX == 0));
1219  match(Set dst (SubD dst (LoadD src)));
1220
1221  format %{ "subsd   $dst, $src" %}
1222  ins_cost(150);
1223  ins_encode %{
1224    __ subsd($dst$$XMMRegister, $src$$Address);
1225  %}
1226  ins_pipe(pipe_slow);
1227%}
1228
1229instruct subD_imm(regD dst, immD con) %{
1230  predicate((UseSSE>=2) && (UseAVX == 0));
1231  match(Set dst (SubD dst con));
1232  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1233  ins_cost(150);
1234  ins_encode %{
1235    __ subsd($dst$$XMMRegister, $constantaddress($con));
1236  %}
1237  ins_pipe(pipe_slow);
1238%}
1239
1240instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
1241  predicate(UseAVX > 0);
1242  match(Set dst (SubD src1 src2));
1243
1244  format %{ "vsubsd  $dst, $src1, $src2" %}
1245  ins_cost(150);
1246  ins_encode %{
1247    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1248  %}
1249  ins_pipe(pipe_slow);
1250%}
1251
1252instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
1253  predicate(UseAVX > 0);
1254  match(Set dst (SubD src1 (LoadD src2)));
1255
1256  format %{ "vsubsd  $dst, $src1, $src2" %}
1257  ins_cost(150);
1258  ins_encode %{
1259    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1260  %}
1261  ins_pipe(pipe_slow);
1262%}
1263
1264instruct subD_reg_imm(regD dst, regD src, immD con) %{
1265  predicate(UseAVX > 0);
1266  match(Set dst (SubD src con));
1267
1268  format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1269  ins_cost(150);
1270  ins_encode %{
1271    __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1272  %}
1273  ins_pipe(pipe_slow);
1274%}
1275
1276instruct mulF_reg(regF dst, regF src) %{
1277  predicate((UseSSE>=1) && (UseAVX == 0));
1278  match(Set dst (MulF dst src));
1279
1280  format %{ "mulss   $dst, $src" %}
1281  ins_cost(150);
1282  ins_encode %{
1283    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
1284  %}
1285  ins_pipe(pipe_slow);
1286%}
1287
1288instruct mulF_mem(regF dst, memory src) %{
1289  predicate((UseSSE>=1) && (UseAVX == 0));
1290  match(Set dst (MulF dst (LoadF src)));
1291
1292  format %{ "mulss   $dst, $src" %}
1293  ins_cost(150);
1294  ins_encode %{
1295    __ mulss($dst$$XMMRegister, $src$$Address);
1296  %}
1297  ins_pipe(pipe_slow);
1298%}
1299
1300instruct mulF_imm(regF dst, immF con) %{
1301  predicate((UseSSE>=1) && (UseAVX == 0));
1302  match(Set dst (MulF dst con));
1303  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1304  ins_cost(150);
1305  ins_encode %{
1306    __ mulss($dst$$XMMRegister, $constantaddress($con));
1307  %}
1308  ins_pipe(pipe_slow);
1309%}
1310
1311instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
1312  predicate(UseAVX > 0);
1313  match(Set dst (MulF src1 src2));
1314
1315  format %{ "vmulss  $dst, $src1, $src2" %}
1316  ins_cost(150);
1317  ins_encode %{
1318    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1319  %}
1320  ins_pipe(pipe_slow);
1321%}
1322
1323instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
1324  predicate(UseAVX > 0);
1325  match(Set dst (MulF src1 (LoadF src2)));
1326
1327  format %{ "vmulss  $dst, $src1, $src2" %}
1328  ins_cost(150);
1329  ins_encode %{
1330    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1331  %}
1332  ins_pipe(pipe_slow);
1333%}
1334
1335instruct mulF_reg_imm(regF dst, regF src, immF con) %{
1336  predicate(UseAVX > 0);
1337  match(Set dst (MulF src con));
1338
1339  format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1340  ins_cost(150);
1341  ins_encode %{
1342    __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1343  %}
1344  ins_pipe(pipe_slow);
1345%}
1346
1347instruct mulD_reg(regD dst, regD src) %{
1348  predicate((UseSSE>=2) && (UseAVX == 0));
1349  match(Set dst (MulD dst src));
1350
1351  format %{ "mulsd   $dst, $src" %}
1352  ins_cost(150);
1353  ins_encode %{
1354    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
1355  %}
1356  ins_pipe(pipe_slow);
1357%}
1358
1359instruct mulD_mem(regD dst, memory src) %{
1360  predicate((UseSSE>=2) && (UseAVX == 0));
1361  match(Set dst (MulD dst (LoadD src)));
1362
1363  format %{ "mulsd   $dst, $src" %}
1364  ins_cost(150);
1365  ins_encode %{
1366    __ mulsd($dst$$XMMRegister, $src$$Address);
1367  %}
1368  ins_pipe(pipe_slow);
1369%}
1370
1371instruct mulD_imm(regD dst, immD con) %{
1372  predicate((UseSSE>=2) && (UseAVX == 0));
1373  match(Set dst (MulD dst con));
1374  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1375  ins_cost(150);
1376  ins_encode %{
1377    __ mulsd($dst$$XMMRegister, $constantaddress($con));
1378  %}
1379  ins_pipe(pipe_slow);
1380%}
1381
1382instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
1383  predicate(UseAVX > 0);
1384  match(Set dst (MulD src1 src2));
1385
1386  format %{ "vmulsd  $dst, $src1, $src2" %}
1387  ins_cost(150);
1388  ins_encode %{
1389    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1390  %}
1391  ins_pipe(pipe_slow);
1392%}
1393
1394instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
1395  predicate(UseAVX > 0);
1396  match(Set dst (MulD src1 (LoadD src2)));
1397
1398  format %{ "vmulsd  $dst, $src1, $src2" %}
1399  ins_cost(150);
1400  ins_encode %{
1401    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1402  %}
1403  ins_pipe(pipe_slow);
1404%}
1405
1406instruct mulD_reg_imm(regD dst, regD src, immD con) %{
1407  predicate(UseAVX > 0);
1408  match(Set dst (MulD src con));
1409
1410  format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1411  ins_cost(150);
1412  ins_encode %{
1413    __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1414  %}
1415  ins_pipe(pipe_slow);
1416%}
1417
1418instruct divF_reg(regF dst, regF src) %{
1419  predicate((UseSSE>=1) && (UseAVX == 0));
1420  match(Set dst (DivF dst src));
1421
1422  format %{ "divss   $dst, $src" %}
1423  ins_cost(150);
1424  ins_encode %{
1425    __ divss($dst$$XMMRegister, $src$$XMMRegister);
1426  %}
1427  ins_pipe(pipe_slow);
1428%}
1429
1430instruct divF_mem(regF dst, memory src) %{
1431  predicate((UseSSE>=1) && (UseAVX == 0));
1432  match(Set dst (DivF dst (LoadF src)));
1433
1434  format %{ "divss   $dst, $src" %}
1435  ins_cost(150);
1436  ins_encode %{
1437    __ divss($dst$$XMMRegister, $src$$Address);
1438  %}
1439  ins_pipe(pipe_slow);
1440%}
1441
1442instruct divF_imm(regF dst, immF con) %{
1443  predicate((UseSSE>=1) && (UseAVX == 0));
1444  match(Set dst (DivF dst con));
1445  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1446  ins_cost(150);
1447  ins_encode %{
1448    __ divss($dst$$XMMRegister, $constantaddress($con));
1449  %}
1450  ins_pipe(pipe_slow);
1451%}
1452
1453instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
1454  predicate(UseAVX > 0);
1455  match(Set dst (DivF src1 src2));
1456
1457  format %{ "vdivss  $dst, $src1, $src2" %}
1458  ins_cost(150);
1459  ins_encode %{
1460    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1461  %}
1462  ins_pipe(pipe_slow);
1463%}
1464
1465instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
1466  predicate(UseAVX > 0);
1467  match(Set dst (DivF src1 (LoadF src2)));
1468
1469  format %{ "vdivss  $dst, $src1, $src2" %}
1470  ins_cost(150);
1471  ins_encode %{
1472    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1473  %}
1474  ins_pipe(pipe_slow);
1475%}
1476
1477instruct divF_reg_imm(regF dst, regF src, immF con) %{
1478  predicate(UseAVX > 0);
1479  match(Set dst (DivF src con));
1480
1481  format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
1482  ins_cost(150);
1483  ins_encode %{
1484    __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1485  %}
1486  ins_pipe(pipe_slow);
1487%}
1488
1489instruct divD_reg(regD dst, regD src) %{
1490  predicate((UseSSE>=2) && (UseAVX == 0));
1491  match(Set dst (DivD dst src));
1492
1493  format %{ "divsd   $dst, $src" %}
1494  ins_cost(150);
1495  ins_encode %{
1496    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
1497  %}
1498  ins_pipe(pipe_slow);
1499%}
1500
1501instruct divD_mem(regD dst, memory src) %{
1502  predicate((UseSSE>=2) && (UseAVX == 0));
1503  match(Set dst (DivD dst (LoadD src)));
1504
1505  format %{ "divsd   $dst, $src" %}
1506  ins_cost(150);
1507  ins_encode %{
1508    __ divsd($dst$$XMMRegister, $src$$Address);
1509  %}
1510  ins_pipe(pipe_slow);
1511%}
1512
1513instruct divD_imm(regD dst, immD con) %{
1514  predicate((UseSSE>=2) && (UseAVX == 0));
1515  match(Set dst (DivD dst con));
1516  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1517  ins_cost(150);
1518  ins_encode %{
1519    __ divsd($dst$$XMMRegister, $constantaddress($con));
1520  %}
1521  ins_pipe(pipe_slow);
1522%}
1523
1524instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
1525  predicate(UseAVX > 0);
1526  match(Set dst (DivD src1 src2));
1527
1528  format %{ "vdivsd  $dst, $src1, $src2" %}
1529  ins_cost(150);
1530  ins_encode %{
1531    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
1532  %}
1533  ins_pipe(pipe_slow);
1534%}
1535
1536instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
1537  predicate(UseAVX > 0);
1538  match(Set dst (DivD src1 (LoadD src2)));
1539
1540  format %{ "vdivsd  $dst, $src1, $src2" %}
1541  ins_cost(150);
1542  ins_encode %{
1543    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
1544  %}
1545  ins_pipe(pipe_slow);
1546%}
1547
1548instruct divD_reg_imm(regD dst, regD src, immD con) %{
1549  predicate(UseAVX > 0);
1550  match(Set dst (DivD src con));
1551
1552  format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
1553  ins_cost(150);
1554  ins_encode %{
1555    __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
1556  %}
1557  ins_pipe(pipe_slow);
1558%}
1559
1560instruct absF_reg(regF dst) %{
1561  predicate((UseSSE>=1) && (UseAVX == 0));
1562  match(Set dst (AbsF dst));
1563  ins_cost(150);
1564  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
1565  ins_encode %{
1566    __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
1567  %}
1568  ins_pipe(pipe_slow);
1569%}
1570
1571instruct absF_reg_reg(regF dst, regF src) %{
1572  predicate(UseAVX > 0);
1573  match(Set dst (AbsF src));
1574  ins_cost(150);
1575  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
1576  ins_encode %{
1577    bool vector256 = false;
1578    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
1579              ExternalAddress(float_signmask()), vector256);
1580  %}
1581  ins_pipe(pipe_slow);
1582%}
1583
1584instruct absD_reg(regD dst) %{
1585  predicate((UseSSE>=2) && (UseAVX == 0));
1586  match(Set dst (AbsD dst));
1587  ins_cost(150);
1588  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
1589            "# abs double by sign masking" %}
1590  ins_encode %{
1591    __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
1592  %}
1593  ins_pipe(pipe_slow);
1594%}
1595
1596instruct absD_reg_reg(regD dst, regD src) %{
1597  predicate(UseAVX > 0);
1598  match(Set dst (AbsD src));
1599  ins_cost(150);
1600  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
1601            "# abs double by sign masking" %}
1602  ins_encode %{
1603    bool vector256 = false;
1604    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
1605              ExternalAddress(double_signmask()), vector256);
1606  %}
1607  ins_pipe(pipe_slow);
1608%}
1609
1610instruct negF_reg(regF dst) %{
1611  predicate((UseSSE>=1) && (UseAVX == 0));
1612  match(Set dst (NegF dst));
1613  ins_cost(150);
1614  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
1615  ins_encode %{
1616    __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
1617  %}
1618  ins_pipe(pipe_slow);
1619%}
1620
1621instruct negF_reg_reg(regF dst, regF src) %{
1622  predicate(UseAVX > 0);
1623  match(Set dst (NegF src));
1624  ins_cost(150);
1625  format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
1626  ins_encode %{
1627    bool vector256 = false;
1628    __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
1629              ExternalAddress(float_signflip()), vector256);
1630  %}
1631  ins_pipe(pipe_slow);
1632%}
1633
1634instruct negD_reg(regD dst) %{
1635  predicate((UseSSE>=2) && (UseAVX == 0));
1636  match(Set dst (NegD dst));
1637  ins_cost(150);
1638  format %{ "xorpd   $dst, [0x8000000000000000]\t"
1639            "# neg double by sign flipping" %}
1640  ins_encode %{
1641    __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
1642  %}
1643  ins_pipe(pipe_slow);
1644%}
1645
1646instruct negD_reg_reg(regD dst, regD src) %{
1647  predicate(UseAVX > 0);
1648  match(Set dst (NegD src));
1649  ins_cost(150);
1650  format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
1651            "# neg double by sign flipping" %}
1652  ins_encode %{
1653    bool vector256 = false;
1654    __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
1655              ExternalAddress(double_signflip()), vector256);
1656  %}
1657  ins_pipe(pipe_slow);
1658%}
1659
1660instruct sqrtF_reg(regF dst, regF src) %{
1661  predicate(UseSSE>=1);
1662  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
1663
1664  format %{ "sqrtss  $dst, $src" %}
1665  ins_cost(150);
1666  ins_encode %{
1667    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
1668  %}
1669  ins_pipe(pipe_slow);
1670%}
1671
1672instruct sqrtF_mem(regF dst, memory src) %{
1673  predicate(UseSSE>=1);
1674  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
1675
1676  format %{ "sqrtss  $dst, $src" %}
1677  ins_cost(150);
1678  ins_encode %{
1679    __ sqrtss($dst$$XMMRegister, $src$$Address);
1680  %}
1681  ins_pipe(pipe_slow);
1682%}
1683
1684instruct sqrtF_imm(regF dst, immF con) %{
1685  predicate(UseSSE>=1);
1686  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
1687  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
1688  ins_cost(150);
1689  ins_encode %{
1690    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
1691  %}
1692  ins_pipe(pipe_slow);
1693%}
1694
1695instruct sqrtD_reg(regD dst, regD src) %{
1696  predicate(UseSSE>=2);
1697  match(Set dst (SqrtD src));
1698
1699  format %{ "sqrtsd  $dst, $src" %}
1700  ins_cost(150);
1701  ins_encode %{
1702    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
1703  %}
1704  ins_pipe(pipe_slow);
1705%}
1706
1707instruct sqrtD_mem(regD dst, memory src) %{
1708  predicate(UseSSE>=2);
1709  match(Set dst (SqrtD (LoadD src)));
1710
1711  format %{ "sqrtsd  $dst, $src" %}
1712  ins_cost(150);
1713  ins_encode %{
1714    __ sqrtsd($dst$$XMMRegister, $src$$Address);
1715  %}
1716  ins_pipe(pipe_slow);
1717%}
1718
1719instruct sqrtD_imm(regD dst, immD con) %{
1720  predicate(UseSSE>=2);
1721  match(Set dst (SqrtD con));
1722  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
1723  ins_cost(150);
1724  ins_encode %{
1725    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
1726  %}
1727  ins_pipe(pipe_slow);
1728%}
1729
1730
1731// ====================VECTOR INSTRUCTIONS=====================================
1732
1733// Load vectors (4 bytes long)
1734instruct loadV4(vecS dst, memory mem) %{
1735  predicate(n->as_LoadVector()->memory_size() == 4);
1736  match(Set dst (LoadVector mem));
1737  ins_cost(125);
1738  format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
1739  ins_encode %{
1740    __ movdl($dst$$XMMRegister, $mem$$Address);
1741  %}
1742  ins_pipe( pipe_slow );
1743%}
1744
1745// Load vectors (8 bytes long)
1746instruct loadV8(vecD dst, memory mem) %{
1747  predicate(n->as_LoadVector()->memory_size() == 8);
1748  match(Set dst (LoadVector mem));
1749  ins_cost(125);
1750  format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
1751  ins_encode %{
1752    __ movq($dst$$XMMRegister, $mem$$Address);
1753  %}
1754  ins_pipe( pipe_slow );
1755%}
1756
1757// Load vectors (16 bytes long)
1758instruct loadV16(vecX dst, memory mem) %{
1759  predicate(n->as_LoadVector()->memory_size() == 16);
1760  match(Set dst (LoadVector mem));
1761  ins_cost(125);
1762  format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
1763  ins_encode %{
1764    __ movdqu($dst$$XMMRegister, $mem$$Address);
1765  %}
1766  ins_pipe( pipe_slow );
1767%}
1768
1769// Load vectors (32 bytes long)
1770instruct loadV32(vecY dst, memory mem) %{
1771  predicate(n->as_LoadVector()->memory_size() == 32);
1772  match(Set dst (LoadVector mem));
1773  ins_cost(125);
1774  format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1775  ins_encode %{
1776    __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1777  %}
1778  ins_pipe( pipe_slow );
1779%}
1780
1781// Store vectors
1782instruct storeV4(memory mem, vecS src) %{
1783  predicate(n->as_StoreVector()->memory_size() == 4);
1784  match(Set mem (StoreVector mem src));
1785  ins_cost(145);
1786  format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
1787  ins_encode %{
1788    __ movdl($mem$$Address, $src$$XMMRegister);
1789  %}
1790  ins_pipe( pipe_slow );
1791%}
1792
1793instruct storeV8(memory mem, vecD src) %{
1794  predicate(n->as_StoreVector()->memory_size() == 8);
1795  match(Set mem (StoreVector mem src));
1796  ins_cost(145);
1797  format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
1798  ins_encode %{
1799    __ movq($mem$$Address, $src$$XMMRegister);
1800  %}
1801  ins_pipe( pipe_slow );
1802%}
1803
1804instruct storeV16(memory mem, vecX src) %{
1805  predicate(n->as_StoreVector()->memory_size() == 16);
1806  match(Set mem (StoreVector mem src));
1807  ins_cost(145);
1808  format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
1809  ins_encode %{
1810    __ movdqu($mem$$Address, $src$$XMMRegister);
1811  %}
1812  ins_pipe( pipe_slow );
1813%}
1814
1815instruct storeV32(memory mem, vecY src) %{
1816  predicate(n->as_StoreVector()->memory_size() == 32);
1817  match(Set mem (StoreVector mem src));
1818  ins_cost(145);
1819  format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1820  ins_encode %{
1821    __ vmovdqu($mem$$Address, $src$$XMMRegister);
1822  %}
1823  ins_pipe( pipe_slow );
1824%}
1825
1826// Replicate byte scalar to be vector
1827instruct Repl4B(vecS dst, rRegI src) %{
1828  predicate(n->as_Vector()->length() == 4);
1829  match(Set dst (ReplicateB src));
1830  format %{ "movd    $dst,$src\n\t"
1831            "punpcklbw $dst,$dst\n\t"
1832            "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1833  ins_encode %{
1834    __ movdl($dst$$XMMRegister, $src$$Register);
1835    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1836    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1837  %}
1838  ins_pipe( pipe_slow );
1839%}
1840
1841instruct Repl8B(vecD dst, rRegI src) %{
1842  predicate(n->as_Vector()->length() == 8);
1843  match(Set dst (ReplicateB src));
1844  format %{ "movd    $dst,$src\n\t"
1845            "punpcklbw $dst,$dst\n\t"
1846            "pshuflw $dst,$dst,0x00\t! replicate8B" %}
1847  ins_encode %{
1848    __ movdl($dst$$XMMRegister, $src$$Register);
1849    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1850    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1851  %}
1852  ins_pipe( pipe_slow );
1853%}
1854
1855instruct Repl16B(vecX dst, rRegI src) %{
1856  predicate(n->as_Vector()->length() == 16);
1857  match(Set dst (ReplicateB src));
1858  format %{ "movd    $dst,$src\n\t"
1859            "punpcklbw $dst,$dst\n\t"
1860            "pshuflw $dst,$dst,0x00\n\t"
1861            "punpcklqdq $dst,$dst\t! replicate16B" %}
1862  ins_encode %{
1863    __ movdl($dst$$XMMRegister, $src$$Register);
1864    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1865    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1866    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1867  %}
1868  ins_pipe( pipe_slow );
1869%}
1870
1871instruct Repl32B(vecY dst, rRegI src) %{
1872  predicate(n->as_Vector()->length() == 32);
1873  match(Set dst (ReplicateB src));
1874  format %{ "movd    $dst,$src\n\t"
1875            "punpcklbw $dst,$dst\n\t"
1876            "pshuflw $dst,$dst,0x00\n\t"
1877            "punpcklqdq $dst,$dst\n\t"
1878            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
1879  ins_encode %{
1880    __ movdl($dst$$XMMRegister, $src$$Register);
1881    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1882    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1883    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1884    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1885  %}
1886  ins_pipe( pipe_slow );
1887%}
1888
1889// Replicate byte scalar immediate to be vector by loading from const table.
1890instruct Repl4B_imm(vecS dst, immI con) %{
1891  predicate(n->as_Vector()->length() == 4);
1892  match(Set dst (ReplicateB con));
1893  format %{ "movdl   $dst,[$constantaddress]\t! replicate4B($con)" %}
1894  ins_encode %{
1895    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1896  %}
1897  ins_pipe( pipe_slow );
1898%}
1899
1900instruct Repl8B_imm(vecD dst, immI con) %{
1901  predicate(n->as_Vector()->length() == 8);
1902  match(Set dst (ReplicateB con));
1903  format %{ "movq    $dst,[$constantaddress]\t! replicate8B($con)" %}
1904  ins_encode %{
1905    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1906  %}
1907  ins_pipe( pipe_slow );
1908%}
1909
1910instruct Repl16B_imm(vecX dst, immI con) %{
1911  predicate(n->as_Vector()->length() == 16);
1912  match(Set dst (ReplicateB con));
1913  format %{ "movq    $dst,[$constantaddress]\n\t"
1914            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
1915  ins_encode %{
1916    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1917    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1918  %}
1919  ins_pipe( pipe_slow );
1920%}
1921
1922instruct Repl32B_imm(vecY dst, immI con) %{
1923  predicate(n->as_Vector()->length() == 32);
1924  match(Set dst (ReplicateB con));
1925  format %{ "movq    $dst,[$constantaddress]\n\t"
1926            "punpcklqdq $dst,$dst\n\t"
1927            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
1928  ins_encode %{
1929    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1930    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
1931    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1932  %}
1933  ins_pipe( pipe_slow );
1934%}
1935
1936// Replicate byte scalar zero to be vector
1937instruct Repl4B_zero(vecS dst, immI0 zero) %{
1938  predicate(n->as_Vector()->length() == 4);
1939  match(Set dst (ReplicateB zero));
1940  format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
1941  ins_encode %{
1942    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1943  %}
1944  ins_pipe( fpu_reg_reg );
1945%}
1946
1947instruct Repl8B_zero(vecD dst, immI0 zero) %{
1948  predicate(n->as_Vector()->length() == 8);
1949  match(Set dst (ReplicateB zero));
1950  format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
1951  ins_encode %{
1952    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1953  %}
1954  ins_pipe( fpu_reg_reg );
1955%}
1956
1957instruct Repl16B_zero(vecX dst, immI0 zero) %{
1958  predicate(n->as_Vector()->length() == 16);
1959  match(Set dst (ReplicateB zero));
1960  format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
1961  ins_encode %{
1962    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1963  %}
1964  ins_pipe( fpu_reg_reg );
1965%}
1966
1967instruct Repl32B_zero(vecY dst, immI0 zero) %{
1968  predicate(n->as_Vector()->length() == 32);
1969  match(Set dst (ReplicateB zero));
1970  format %{ "vpxor   $dst,$dst,$dst\t! replicate32B zero" %}
1971  ins_encode %{
1972    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1973    bool vector256 = true;
1974    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1975  %}
1976  ins_pipe( fpu_reg_reg );
1977%}
1978
1979// Replicate char/short (2 byte) scalar to be vector
1980instruct Repl2S(vecS dst, rRegI src) %{
1981  predicate(n->as_Vector()->length() == 2);
1982  match(Set dst (ReplicateS src));
1983  format %{ "movd    $dst,$src\n\t"
1984            "pshuflw $dst,$dst,0x00\t! replicate2S" %}
1985  ins_encode %{
1986    __ movdl($dst$$XMMRegister, $src$$Register);
1987    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1988  %}
1989  ins_pipe( fpu_reg_reg );
1990%}
1991
1992instruct Repl4S(vecD dst, rRegI src) %{
1993  predicate(n->as_Vector()->length() == 4);
1994  match(Set dst (ReplicateS src));
1995  format %{ "movd    $dst,$src\n\t"
1996            "pshuflw $dst,$dst,0x00\t! replicate4S" %}
1997  ins_encode %{
1998    __ movdl($dst$$XMMRegister, $src$$Register);
1999    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2000  %}
2001  ins_pipe( fpu_reg_reg );
2002%}
2003
2004instruct Repl8S(vecX dst, rRegI src) %{
2005  predicate(n->as_Vector()->length() == 8);
2006  match(Set dst (ReplicateS src));
2007  format %{ "movd    $dst,$src\n\t"
2008            "pshuflw $dst,$dst,0x00\n\t"
2009            "punpcklqdq $dst,$dst\t! replicate8S" %}
2010  ins_encode %{
2011    __ movdl($dst$$XMMRegister, $src$$Register);
2012    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2013    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2014  %}
2015  ins_pipe( pipe_slow );
2016%}
2017
2018instruct Repl16S(vecY dst, rRegI src) %{
2019  predicate(n->as_Vector()->length() == 16);
2020  match(Set dst (ReplicateS src));
2021  format %{ "movd    $dst,$src\n\t"
2022            "pshuflw $dst,$dst,0x00\n\t"
2023            "punpcklqdq $dst,$dst\n\t"
2024            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
2025  ins_encode %{
2026    __ movdl($dst$$XMMRegister, $src$$Register);
2027    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2028    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2029    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2030  %}
2031  ins_pipe( pipe_slow );
2032%}
2033
2034// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
2035instruct Repl2S_imm(vecS dst, immI con) %{
2036  predicate(n->as_Vector()->length() == 2);
2037  match(Set dst (ReplicateS con));
2038  format %{ "movdl   $dst,[$constantaddress]\t! replicate2S($con)" %}
2039  ins_encode %{
2040    __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
2041  %}
2042  ins_pipe( fpu_reg_reg );
2043%}
2044
2045instruct Repl4S_imm(vecD dst, immI con) %{
2046  predicate(n->as_Vector()->length() == 4);
2047  match(Set dst (ReplicateS con));
2048  format %{ "movq    $dst,[$constantaddress]\t! replicate4S($con)" %}
2049  ins_encode %{
2050    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2051  %}
2052  ins_pipe( fpu_reg_reg );
2053%}
2054
2055instruct Repl8S_imm(vecX dst, immI con) %{
2056  predicate(n->as_Vector()->length() == 8);
2057  match(Set dst (ReplicateS con));
2058  format %{ "movq    $dst,[$constantaddress]\n\t"
2059            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
2060  ins_encode %{
2061    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2062    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2063  %}
2064  ins_pipe( pipe_slow );
2065%}
2066
2067instruct Repl16S_imm(vecY dst, immI con) %{
2068  predicate(n->as_Vector()->length() == 16);
2069  match(Set dst (ReplicateS con));
2070  format %{ "movq    $dst,[$constantaddress]\n\t"
2071            "punpcklqdq $dst,$dst\n\t"
2072            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
2073  ins_encode %{
2074    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
2075    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2076    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2077  %}
2078  ins_pipe( pipe_slow );
2079%}
2080
2081// Replicate char/short (2 byte) scalar zero to be vector
2082instruct Repl2S_zero(vecS dst, immI0 zero) %{
2083  predicate(n->as_Vector()->length() == 2);
2084  match(Set dst (ReplicateS zero));
2085  format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
2086  ins_encode %{
2087    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2088  %}
2089  ins_pipe( fpu_reg_reg );
2090%}
2091
2092instruct Repl4S_zero(vecD dst, immI0 zero) %{
2093  predicate(n->as_Vector()->length() == 4);
2094  match(Set dst (ReplicateS zero));
2095  format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
2096  ins_encode %{
2097    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2098  %}
2099  ins_pipe( fpu_reg_reg );
2100%}
2101
2102instruct Repl8S_zero(vecX dst, immI0 zero) %{
2103  predicate(n->as_Vector()->length() == 8);
2104  match(Set dst (ReplicateS zero));
2105  format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
2106  ins_encode %{
2107    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2108  %}
2109  ins_pipe( fpu_reg_reg );
2110%}
2111
2112instruct Repl16S_zero(vecY dst, immI0 zero) %{
2113  predicate(n->as_Vector()->length() == 16);
2114  match(Set dst (ReplicateS zero));
2115  format %{ "vpxor   $dst,$dst,$dst\t! replicate16S zero" %}
2116  ins_encode %{
2117    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2118    bool vector256 = true;
2119    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2120  %}
2121  ins_pipe( fpu_reg_reg );
2122%}
2123
2124// Replicate integer (4 byte) scalar to be vector
2125instruct Repl2I(vecD dst, rRegI src) %{
2126  predicate(n->as_Vector()->length() == 2);
2127  match(Set dst (ReplicateI src));
2128  format %{ "movd    $dst,$src\n\t"
2129            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2130  ins_encode %{
2131    __ movdl($dst$$XMMRegister, $src$$Register);
2132    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2133  %}
2134  ins_pipe( fpu_reg_reg );
2135%}
2136
2137instruct Repl4I(vecX dst, rRegI src) %{
2138  predicate(n->as_Vector()->length() == 4);
2139  match(Set dst (ReplicateI src));
2140  format %{ "movd    $dst,$src\n\t"
2141            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
2142  ins_encode %{
2143    __ movdl($dst$$XMMRegister, $src$$Register);
2144    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2145  %}
2146  ins_pipe( pipe_slow );
2147%}
2148
2149instruct Repl8I(vecY dst, rRegI src) %{
2150  predicate(n->as_Vector()->length() == 8);
2151  match(Set dst (ReplicateI src));
2152  format %{ "movd    $dst,$src\n\t"
2153            "pshufd  $dst,$dst,0x00\n\t"
2154            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2155  ins_encode %{
2156    __ movdl($dst$$XMMRegister, $src$$Register);
2157    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2158    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2159  %}
2160  ins_pipe( pipe_slow );
2161%}
2162
2163// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2164instruct Repl2I_imm(vecD dst, immI con) %{
2165  predicate(n->as_Vector()->length() == 2);
2166  match(Set dst (ReplicateI con));
2167  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
2168  ins_encode %{
2169    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2170  %}
2171  ins_pipe( fpu_reg_reg );
2172%}
2173
2174instruct Repl4I_imm(vecX dst, immI con) %{
2175  predicate(n->as_Vector()->length() == 4);
2176  match(Set dst (ReplicateI con));
2177  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2178            "punpcklqdq $dst,$dst" %}
2179  ins_encode %{
2180    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2181    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2182  %}
2183  ins_pipe( pipe_slow );
2184%}
2185
2186instruct Repl8I_imm(vecY dst, immI con) %{
2187  predicate(n->as_Vector()->length() == 8);
2188  match(Set dst (ReplicateI con));
2189  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2190            "punpcklqdq $dst,$dst\n\t"
2191            "vinserti128h $dst,$dst,$dst" %}
2192  ins_encode %{
2193    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2194    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2195    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2196  %}
2197  ins_pipe( pipe_slow );
2198%}
2199
2200// Integer could be loaded into xmm register directly from memory.
2201instruct Repl2I_mem(vecD dst, memory mem) %{
2202  predicate(n->as_Vector()->length() == 2);
2203  match(Set dst (ReplicateI (LoadI mem)));
2204  format %{ "movd    $dst,$mem\n\t"
2205            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
2206  ins_encode %{
2207    __ movdl($dst$$XMMRegister, $mem$$Address);
2208    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2209  %}
2210  ins_pipe( fpu_reg_reg );
2211%}
2212
2213instruct Repl4I_mem(vecX dst, memory mem) %{
2214  predicate(n->as_Vector()->length() == 4);
2215  match(Set dst (ReplicateI (LoadI mem)));
2216  format %{ "movd    $dst,$mem\n\t"
2217            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
2218  ins_encode %{
2219    __ movdl($dst$$XMMRegister, $mem$$Address);
2220    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2221  %}
2222  ins_pipe( pipe_slow );
2223%}
2224
2225instruct Repl8I_mem(vecY dst, memory mem) %{
2226  predicate(n->as_Vector()->length() == 8);
2227  match(Set dst (ReplicateI (LoadI mem)));
2228  format %{ "movd    $dst,$mem\n\t"
2229            "pshufd  $dst,$dst,0x00\n\t"
2230            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
2231  ins_encode %{
2232    __ movdl($dst$$XMMRegister, $mem$$Address);
2233    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2234    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2235  %}
2236  ins_pipe( pipe_slow );
2237%}
2238
2239// Replicate integer (4 byte) scalar zero to be vector
2240instruct Repl2I_zero(vecD dst, immI0 zero) %{
2241  predicate(n->as_Vector()->length() == 2);
2242  match(Set dst (ReplicateI zero));
2243  format %{ "pxor    $dst,$dst\t! replicate2I" %}
2244  ins_encode %{
2245    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2246  %}
2247  ins_pipe( fpu_reg_reg );
2248%}
2249
2250instruct Repl4I_zero(vecX dst, immI0 zero) %{
2251  predicate(n->as_Vector()->length() == 4);
2252  match(Set dst (ReplicateI zero));
2253  format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
2254  ins_encode %{
2255    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2256  %}
2257  ins_pipe( fpu_reg_reg );
2258%}
2259
2260instruct Repl8I_zero(vecY dst, immI0 zero) %{
2261  predicate(n->as_Vector()->length() == 8);
2262  match(Set dst (ReplicateI zero));
2263  format %{ "vpxor   $dst,$dst,$dst\t! replicate8I zero" %}
2264  ins_encode %{
2265    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2266    bool vector256 = true;
2267    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2268  %}
2269  ins_pipe( fpu_reg_reg );
2270%}
2271
2272// Replicate long (8 byte) scalar to be vector
2273#ifdef _LP64
2274instruct Repl2L(vecX dst, rRegL src) %{
2275  predicate(n->as_Vector()->length() == 2);
2276  match(Set dst (ReplicateL src));
2277  format %{ "movdq   $dst,$src\n\t"
2278            "punpcklqdq $dst,$dst\t! replicate2L" %}
2279  ins_encode %{
2280    __ movdq($dst$$XMMRegister, $src$$Register);
2281    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2282  %}
2283  ins_pipe( pipe_slow );
2284%}
2285
2286instruct Repl4L(vecY dst, rRegL src) %{
2287  predicate(n->as_Vector()->length() == 4);
2288  match(Set dst (ReplicateL src));
2289  format %{ "movdq   $dst,$src\n\t"
2290            "punpcklqdq $dst,$dst\n\t"
2291            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2292  ins_encode %{
2293    __ movdq($dst$$XMMRegister, $src$$Register);
2294    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2295    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2296  %}
2297  ins_pipe( pipe_slow );
2298%}
2299#else // _LP64
2300instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2301  predicate(n->as_Vector()->length() == 2);
2302  match(Set dst (ReplicateL src));
2303  effect(TEMP dst, USE src, TEMP tmp);
2304  format %{ "movdl   $dst,$src.lo\n\t"
2305            "movdl   $tmp,$src.hi\n\t"
2306            "punpckldq $dst,$tmp\n\t"
2307            "punpcklqdq $dst,$dst\t! replicate2L"%}
2308  ins_encode %{
2309    __ movdl($dst$$XMMRegister, $src$$Register);
2310    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2311    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2312    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2313  %}
2314  ins_pipe( pipe_slow );
2315%}
2316
2317instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2318  predicate(n->as_Vector()->length() == 4);
2319  match(Set dst (ReplicateL src));
2320  effect(TEMP dst, USE src, TEMP tmp);
2321  format %{ "movdl   $dst,$src.lo\n\t"
2322            "movdl   $tmp,$src.hi\n\t"
2323            "punpckldq $dst,$tmp\n\t"
2324            "punpcklqdq $dst,$dst\n\t"
2325            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2326  ins_encode %{
2327    __ movdl($dst$$XMMRegister, $src$$Register);
2328    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2329    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2330    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2331    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2332  %}
2333  ins_pipe( pipe_slow );
2334%}
2335#endif // _LP64
2336
2337// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2338instruct Repl2L_imm(vecX dst, immL con) %{
2339  predicate(n->as_Vector()->length() == 2);
2340  match(Set dst (ReplicateL con));
2341  format %{ "movq    $dst,[$constantaddress]\n\t"
2342            "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
2343  ins_encode %{
2344    __ movq($dst$$XMMRegister, $constantaddress($con));
2345    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2346  %}
2347  ins_pipe( pipe_slow );
2348%}
2349
2350instruct Repl4L_imm(vecY dst, immL con) %{
2351  predicate(n->as_Vector()->length() == 4);
2352  match(Set dst (ReplicateL con));
2353  format %{ "movq    $dst,[$constantaddress]\n\t"
2354            "punpcklqdq $dst,$dst\n\t"
2355            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
2356  ins_encode %{
2357    __ movq($dst$$XMMRegister, $constantaddress($con));
2358    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2359    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2360  %}
2361  ins_pipe( pipe_slow );
2362%}
2363
2364// Long could be loaded into xmm register directly from memory.
2365instruct Repl2L_mem(vecX dst, memory mem) %{
2366  predicate(n->as_Vector()->length() == 2);
2367  match(Set dst (ReplicateL (LoadL mem)));
2368  format %{ "movq    $dst,$mem\n\t"
2369            "punpcklqdq $dst,$dst\t! replicate2L" %}
2370  ins_encode %{
2371    __ movq($dst$$XMMRegister, $mem$$Address);
2372    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2373  %}
2374  ins_pipe( pipe_slow );
2375%}
2376
2377instruct Repl4L_mem(vecY dst, memory mem) %{
2378  predicate(n->as_Vector()->length() == 4);
2379  match(Set dst (ReplicateL (LoadL mem)));
2380  format %{ "movq    $dst,$mem\n\t"
2381            "punpcklqdq $dst,$dst\n\t"
2382            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
2383  ins_encode %{
2384    __ movq($dst$$XMMRegister, $mem$$Address);
2385    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
2386    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2387  %}
2388  ins_pipe( pipe_slow );
2389%}
2390
2391// Replicate long (8 byte) scalar zero to be vector
2392instruct Repl2L_zero(vecX dst, immL0 zero) %{
2393  predicate(n->as_Vector()->length() == 2);
2394  match(Set dst (ReplicateL zero));
2395  format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
2396  ins_encode %{
2397    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2398  %}
2399  ins_pipe( fpu_reg_reg );
2400%}
2401
2402instruct Repl4L_zero(vecY dst, immL0 zero) %{
2403  predicate(n->as_Vector()->length() == 4);
2404  match(Set dst (ReplicateL zero));
2405  format %{ "vpxor   $dst,$dst,$dst\t! replicate4L zero" %}
2406  ins_encode %{
2407    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2408    bool vector256 = true;
2409    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2410  %}
2411  ins_pipe( fpu_reg_reg );
2412%}
2413
2414// Replicate float (4 byte) scalar to be vector
2415instruct Repl2F(vecD dst, regF src) %{
2416  predicate(n->as_Vector()->length() == 2);
2417  match(Set dst (ReplicateF src));
2418  format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
2419  ins_encode %{
2420    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2421  %}
2422  ins_pipe( fpu_reg_reg );
2423%}
2424
2425instruct Repl4F(vecX dst, regF src) %{
2426  predicate(n->as_Vector()->length() == 4);
2427  match(Set dst (ReplicateF src));
2428  format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
2429  ins_encode %{
2430    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2431  %}
2432  ins_pipe( pipe_slow );
2433%}
2434
2435instruct Repl8F(vecY dst, regF src) %{
2436  predicate(n->as_Vector()->length() == 8);
2437  match(Set dst (ReplicateF src));
2438  format %{ "pshufd  $dst,$src,0x00\n\t"
2439            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2440  ins_encode %{
2441    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2442    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2443  %}
2444  ins_pipe( pipe_slow );
2445%}
2446
2447// Replicate float (4 byte) scalar zero to be vector
2448instruct Repl2F_zero(vecD dst, immF0 zero) %{
2449  predicate(n->as_Vector()->length() == 2);
2450  match(Set dst (ReplicateF zero));
2451  format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
2452  ins_encode %{
2453    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2454  %}
2455  ins_pipe( fpu_reg_reg );
2456%}
2457
2458instruct Repl4F_zero(vecX dst, immF0 zero) %{
2459  predicate(n->as_Vector()->length() == 4);
2460  match(Set dst (ReplicateF zero));
2461  format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
2462  ins_encode %{
2463    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2464  %}
2465  ins_pipe( fpu_reg_reg );
2466%}
2467
2468instruct Repl8F_zero(vecY dst, immF0 zero) %{
2469  predicate(n->as_Vector()->length() == 8);
2470  match(Set dst (ReplicateF zero));
2471  format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
2472  ins_encode %{
2473    bool vector256 = true;
2474    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2475  %}
2476  ins_pipe( fpu_reg_reg );
2477%}
2478
2479// Replicate double (8 bytes) scalar to be vector
2480instruct Repl2D(vecX dst, regD src) %{
2481  predicate(n->as_Vector()->length() == 2);
2482  match(Set dst (ReplicateD src));
2483  format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
2484  ins_encode %{
2485    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2486  %}
2487  ins_pipe( pipe_slow );
2488%}
2489
2490instruct Repl4D(vecY dst, regD src) %{
2491  predicate(n->as_Vector()->length() == 4);
2492  match(Set dst (ReplicateD src));
2493  format %{ "pshufd  $dst,$src,0x44\n\t"
2494            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2495  ins_encode %{
2496    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2497    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2498  %}
2499  ins_pipe( pipe_slow );
2500%}
2501
2502// Replicate double (8 byte) scalar zero to be vector
2503instruct Repl2D_zero(vecX dst, immD0 zero) %{
2504  predicate(n->as_Vector()->length() == 2);
2505  match(Set dst (ReplicateD zero));
2506  format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
2507  ins_encode %{
2508    __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2509  %}
2510  ins_pipe( fpu_reg_reg );
2511%}
2512
2513instruct Repl4D_zero(vecY dst, immD0 zero) %{
2514  predicate(n->as_Vector()->length() == 4);
2515  match(Set dst (ReplicateD zero));
2516  format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2517  ins_encode %{
2518    bool vector256 = true;
2519    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2520  %}
2521  ins_pipe( fpu_reg_reg );
2522%}
2523
2524// ====================VECTOR ARITHMETIC=======================================
2525
2526// --------------------------------- ADD --------------------------------------
2527
2528// Bytes vector add
2529instruct vadd4B(vecS dst, vecS src) %{
2530  predicate(n->as_Vector()->length() == 4);
2531  match(Set dst (AddVB dst src));
2532  format %{ "paddb   $dst,$src\t! add packed4B" %}
2533  ins_encode %{
2534    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2535  %}
2536  ins_pipe( pipe_slow );
2537%}
2538
2539instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
2540  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2541  match(Set dst (AddVB src1 src2));
2542  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
2543  ins_encode %{
2544    bool vector256 = false;
2545    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2546  %}
2547  ins_pipe( pipe_slow );
2548%}
2549
2550instruct vadd8B(vecD dst, vecD src) %{
2551  predicate(n->as_Vector()->length() == 8);
2552  match(Set dst (AddVB dst src));
2553  format %{ "paddb   $dst,$src\t! add packed8B" %}
2554  ins_encode %{
2555    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2556  %}
2557  ins_pipe( pipe_slow );
2558%}
2559
2560instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
2561  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2562  match(Set dst (AddVB src1 src2));
2563  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
2564  ins_encode %{
2565    bool vector256 = false;
2566    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2567  %}
2568  ins_pipe( pipe_slow );
2569%}
2570
2571instruct vadd16B(vecX dst, vecX src) %{
2572  predicate(n->as_Vector()->length() == 16);
2573  match(Set dst (AddVB dst src));
2574  format %{ "paddb   $dst,$src\t! add packed16B" %}
2575  ins_encode %{
2576    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
2577  %}
2578  ins_pipe( pipe_slow );
2579%}
2580
2581instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
2582  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2583  match(Set dst (AddVB src1 src2));
2584  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
2585  ins_encode %{
2586    bool vector256 = false;
2587    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2588  %}
2589  ins_pipe( pipe_slow );
2590%}
2591
2592instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
2593  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
2594  match(Set dst (AddVB src (LoadVector mem)));
2595  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
2596  ins_encode %{
2597    bool vector256 = false;
2598    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2599  %}
2600  ins_pipe( pipe_slow );
2601%}
2602
2603instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
2604  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2605  match(Set dst (AddVB src1 src2));
2606  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
2607  ins_encode %{
2608    bool vector256 = true;
2609    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2610  %}
2611  ins_pipe( pipe_slow );
2612%}
2613
2614instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
2615  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
2616  match(Set dst (AddVB src (LoadVector mem)));
2617  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
2618  ins_encode %{
2619    bool vector256 = true;
2620    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2621  %}
2622  ins_pipe( pipe_slow );
2623%}
2624
2625// Shorts/Chars vector add
2626instruct vadd2S(vecS dst, vecS src) %{
2627  predicate(n->as_Vector()->length() == 2);
2628  match(Set dst (AddVS dst src));
2629  format %{ "paddw   $dst,$src\t! add packed2S" %}
2630  ins_encode %{
2631    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2632  %}
2633  ins_pipe( pipe_slow );
2634%}
2635
2636instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
2637  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2638  match(Set dst (AddVS src1 src2));
2639  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
2640  ins_encode %{
2641    bool vector256 = false;
2642    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2643  %}
2644  ins_pipe( pipe_slow );
2645%}
2646
2647instruct vadd4S(vecD dst, vecD src) %{
2648  predicate(n->as_Vector()->length() == 4);
2649  match(Set dst (AddVS dst src));
2650  format %{ "paddw   $dst,$src\t! add packed4S" %}
2651  ins_encode %{
2652    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2653  %}
2654  ins_pipe( pipe_slow );
2655%}
2656
2657instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
2658  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2659  match(Set dst (AddVS src1 src2));
2660  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
2661  ins_encode %{
2662    bool vector256 = false;
2663    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2664  %}
2665  ins_pipe( pipe_slow );
2666%}
2667
2668instruct vadd8S(vecX dst, vecX src) %{
2669  predicate(n->as_Vector()->length() == 8);
2670  match(Set dst (AddVS dst src));
2671  format %{ "paddw   $dst,$src\t! add packed8S" %}
2672  ins_encode %{
2673    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
2674  %}
2675  ins_pipe( pipe_slow );
2676%}
2677
2678instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
2679  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2680  match(Set dst (AddVS src1 src2));
2681  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
2682  ins_encode %{
2683    bool vector256 = false;
2684    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2685  %}
2686  ins_pipe( pipe_slow );
2687%}
2688
2689instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
2690  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2691  match(Set dst (AddVS src (LoadVector mem)));
2692  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
2693  ins_encode %{
2694    bool vector256 = false;
2695    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2696  %}
2697  ins_pipe( pipe_slow );
2698%}
2699
2700instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
2701  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2702  match(Set dst (AddVS src1 src2));
2703  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
2704  ins_encode %{
2705    bool vector256 = true;
2706    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2707  %}
2708  ins_pipe( pipe_slow );
2709%}
2710
2711instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
2712  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
2713  match(Set dst (AddVS src (LoadVector mem)));
2714  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
2715  ins_encode %{
2716    bool vector256 = true;
2717    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2718  %}
2719  ins_pipe( pipe_slow );
2720%}
2721
2722// Integers vector add
2723instruct vadd2I(vecD dst, vecD src) %{
2724  predicate(n->as_Vector()->length() == 2);
2725  match(Set dst (AddVI dst src));
2726  format %{ "paddd   $dst,$src\t! add packed2I" %}
2727  ins_encode %{
2728    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2729  %}
2730  ins_pipe( pipe_slow );
2731%}
2732
2733instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
2734  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2735  match(Set dst (AddVI src1 src2));
2736  format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
2737  ins_encode %{
2738    bool vector256 = false;
2739    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2740  %}
2741  ins_pipe( pipe_slow );
2742%}
2743
2744instruct vadd4I(vecX dst, vecX src) %{
2745  predicate(n->as_Vector()->length() == 4);
2746  match(Set dst (AddVI dst src));
2747  format %{ "paddd   $dst,$src\t! add packed4I" %}
2748  ins_encode %{
2749    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
2750  %}
2751  ins_pipe( pipe_slow );
2752%}
2753
2754instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
2755  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2756  match(Set dst (AddVI src1 src2));
2757  format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
2758  ins_encode %{
2759    bool vector256 = false;
2760    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2761  %}
2762  ins_pipe( pipe_slow );
2763%}
2764
2765instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
2766  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2767  match(Set dst (AddVI src (LoadVector mem)));
2768  format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
2769  ins_encode %{
2770    bool vector256 = false;
2771    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2772  %}
2773  ins_pipe( pipe_slow );
2774%}
2775
2776instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
2777  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2778  match(Set dst (AddVI src1 src2));
2779  format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
2780  ins_encode %{
2781    bool vector256 = true;
2782    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2783  %}
2784  ins_pipe( pipe_slow );
2785%}
2786
2787instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
2788  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
2789  match(Set dst (AddVI src (LoadVector mem)));
2790  format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
2791  ins_encode %{
2792    bool vector256 = true;
2793    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2794  %}
2795  ins_pipe( pipe_slow );
2796%}
2797
2798// Longs vector add
2799instruct vadd2L(vecX dst, vecX src) %{
2800  predicate(n->as_Vector()->length() == 2);
2801  match(Set dst (AddVL dst src));
2802  format %{ "paddq   $dst,$src\t! add packed2L" %}
2803  ins_encode %{
2804    __ paddq($dst$$XMMRegister, $src$$XMMRegister);
2805  %}
2806  ins_pipe( pipe_slow );
2807%}
2808
2809instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
2810  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2811  match(Set dst (AddVL src1 src2));
2812  format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
2813  ins_encode %{
2814    bool vector256 = false;
2815    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2816  %}
2817  ins_pipe( pipe_slow );
2818%}
2819
2820instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
2821  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2822  match(Set dst (AddVL src (LoadVector mem)));
2823  format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
2824  ins_encode %{
2825    bool vector256 = false;
2826    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2827  %}
2828  ins_pipe( pipe_slow );
2829%}
2830
2831instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
2832  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2833  match(Set dst (AddVL src1 src2));
2834  format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
2835  ins_encode %{
2836    bool vector256 = true;
2837    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2838  %}
2839  ins_pipe( pipe_slow );
2840%}
2841
2842instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
2843  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
2844  match(Set dst (AddVL src (LoadVector mem)));
2845  format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
2846  ins_encode %{
2847    bool vector256 = true;
2848    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2849  %}
2850  ins_pipe( pipe_slow );
2851%}
2852
2853// Floats vector add
2854instruct vadd2F(vecD dst, vecD src) %{
2855  predicate(n->as_Vector()->length() == 2);
2856  match(Set dst (AddVF dst src));
2857  format %{ "addps   $dst,$src\t! add packed2F" %}
2858  ins_encode %{
2859    __ addps($dst$$XMMRegister, $src$$XMMRegister);
2860  %}
2861  ins_pipe( pipe_slow );
2862%}
2863
2864instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
2865  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2866  match(Set dst (AddVF src1 src2));
2867  format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
2868  ins_encode %{
2869    bool vector256 = false;
2870    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2871  %}
2872  ins_pipe( pipe_slow );
2873%}
2874
2875instruct vadd4F(vecX dst, vecX src) %{
2876  predicate(n->as_Vector()->length() == 4);
2877  match(Set dst (AddVF dst src));
2878  format %{ "addps   $dst,$src\t! add packed4F" %}
2879  ins_encode %{
2880    __ addps($dst$$XMMRegister, $src$$XMMRegister);
2881  %}
2882  ins_pipe( pipe_slow );
2883%}
2884
2885instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
2886  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2887  match(Set dst (AddVF src1 src2));
2888  format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
2889  ins_encode %{
2890    bool vector256 = false;
2891    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2892  %}
2893  ins_pipe( pipe_slow );
2894%}
2895
2896instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
2897  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2898  match(Set dst (AddVF src (LoadVector mem)));
2899  format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
2900  ins_encode %{
2901    bool vector256 = false;
2902    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2903  %}
2904  ins_pipe( pipe_slow );
2905%}
2906
2907instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
2908  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2909  match(Set dst (AddVF src1 src2));
2910  format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
2911  ins_encode %{
2912    bool vector256 = true;
2913    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2914  %}
2915  ins_pipe( pipe_slow );
2916%}
2917
2918instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
2919  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
2920  match(Set dst (AddVF src (LoadVector mem)));
2921  format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
2922  ins_encode %{
2923    bool vector256 = true;
2924    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2925  %}
2926  ins_pipe( pipe_slow );
2927%}
2928
2929// Doubles vector add
2930instruct vadd2D(vecX dst, vecX src) %{
2931  predicate(n->as_Vector()->length() == 2);
2932  match(Set dst (AddVD dst src));
2933  format %{ "addpd   $dst,$src\t! add packed2D" %}
2934  ins_encode %{
2935    __ addpd($dst$$XMMRegister, $src$$XMMRegister);
2936  %}
2937  ins_pipe( pipe_slow );
2938%}
2939
2940instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
2941  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2942  match(Set dst (AddVD src1 src2));
2943  format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
2944  ins_encode %{
2945    bool vector256 = false;
2946    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2947  %}
2948  ins_pipe( pipe_slow );
2949%}
2950
2951instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
2952  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
2953  match(Set dst (AddVD src (LoadVector mem)));
2954  format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
2955  ins_encode %{
2956    bool vector256 = false;
2957    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2958  %}
2959  ins_pipe( pipe_slow );
2960%}
2961
2962instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
2963  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2964  match(Set dst (AddVD src1 src2));
2965  format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
2966  ins_encode %{
2967    bool vector256 = true;
2968    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
2969  %}
2970  ins_pipe( pipe_slow );
2971%}
2972
2973instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
2974  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2975  match(Set dst (AddVD src (LoadVector mem)));
2976  format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
2977  ins_encode %{
2978    bool vector256 = true;
2979    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
2980  %}
2981  ins_pipe( pipe_slow );
2982%}
2983
2984// --------------------------------- SUB --------------------------------------
2985
2986// Bytes vector sub
2987instruct vsub4B(vecS dst, vecS src) %{
2988  predicate(n->as_Vector()->length() == 4);
2989  match(Set dst (SubVB dst src));
2990  format %{ "psubb   $dst,$src\t! sub packed4B" %}
2991  ins_encode %{
2992    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
2993  %}
2994  ins_pipe( pipe_slow );
2995%}
2996
2997instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
2998  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
2999  match(Set dst (SubVB src1 src2));
3000  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
3001  ins_encode %{
3002    bool vector256 = false;
3003    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3004  %}
3005  ins_pipe( pipe_slow );
3006%}
3007
3008instruct vsub8B(vecD dst, vecD src) %{
3009  predicate(n->as_Vector()->length() == 8);
3010  match(Set dst (SubVB dst src));
3011  format %{ "psubb   $dst,$src\t! sub packed8B" %}
3012  ins_encode %{
3013    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3014  %}
3015  ins_pipe( pipe_slow );
3016%}
3017
3018instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
3019  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3020  match(Set dst (SubVB src1 src2));
3021  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
3022  ins_encode %{
3023    bool vector256 = false;
3024    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3025  %}
3026  ins_pipe( pipe_slow );
3027%}
3028
3029instruct vsub16B(vecX dst, vecX src) %{
3030  predicate(n->as_Vector()->length() == 16);
3031  match(Set dst (SubVB dst src));
3032  format %{ "psubb   $dst,$src\t! sub packed16B" %}
3033  ins_encode %{
3034    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
3035  %}
3036  ins_pipe( pipe_slow );
3037%}
3038
3039instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
3040  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3041  match(Set dst (SubVB src1 src2));
3042  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
3043  ins_encode %{
3044    bool vector256 = false;
3045    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3046  %}
3047  ins_pipe( pipe_slow );
3048%}
3049
3050instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
3051  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
3052  match(Set dst (SubVB src (LoadVector mem)));
3053  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
3054  ins_encode %{
3055    bool vector256 = false;
3056    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3057  %}
3058  ins_pipe( pipe_slow );
3059%}
3060
3061instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
3062  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3063  match(Set dst (SubVB src1 src2));
3064  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
3065  ins_encode %{
3066    bool vector256 = true;
3067    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3068  %}
3069  ins_pipe( pipe_slow );
3070%}
3071
3072instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
3073  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
3074  match(Set dst (SubVB src (LoadVector mem)));
3075  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
3076  ins_encode %{
3077    bool vector256 = true;
3078    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3079  %}
3080  ins_pipe( pipe_slow );
3081%}
3082
3083// Shorts/Chars vector sub
3084instruct vsub2S(vecS dst, vecS src) %{
3085  predicate(n->as_Vector()->length() == 2);
3086  match(Set dst (SubVS dst src));
3087  format %{ "psubw   $dst,$src\t! sub packed2S" %}
3088  ins_encode %{
3089    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3090  %}
3091  ins_pipe( pipe_slow );
3092%}
3093
3094instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
3095  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3096  match(Set dst (SubVS src1 src2));
3097  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
3098  ins_encode %{
3099    bool vector256 = false;
3100    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3101  %}
3102  ins_pipe( pipe_slow );
3103%}
3104
3105instruct vsub4S(vecD dst, vecD src) %{
3106  predicate(n->as_Vector()->length() == 4);
3107  match(Set dst (SubVS dst src));
3108  format %{ "psubw   $dst,$src\t! sub packed4S" %}
3109  ins_encode %{
3110    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3111  %}
3112  ins_pipe( pipe_slow );
3113%}
3114
3115instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
3116  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3117  match(Set dst (SubVS src1 src2));
3118  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
3119  ins_encode %{
3120    bool vector256 = false;
3121    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3122  %}
3123  ins_pipe( pipe_slow );
3124%}
3125
3126instruct vsub8S(vecX dst, vecX src) %{
3127  predicate(n->as_Vector()->length() == 8);
3128  match(Set dst (SubVS dst src));
3129  format %{ "psubw   $dst,$src\t! sub packed8S" %}
3130  ins_encode %{
3131    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
3132  %}
3133  ins_pipe( pipe_slow );
3134%}
3135
3136instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
3137  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3138  match(Set dst (SubVS src1 src2));
3139  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
3140  ins_encode %{
3141    bool vector256 = false;
3142    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3143  %}
3144  ins_pipe( pipe_slow );
3145%}
3146
3147instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
3148  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3149  match(Set dst (SubVS src (LoadVector mem)));
3150  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
3151  ins_encode %{
3152    bool vector256 = false;
3153    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3154  %}
3155  ins_pipe( pipe_slow );
3156%}
3157
3158instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
3159  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3160  match(Set dst (SubVS src1 src2));
3161  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
3162  ins_encode %{
3163    bool vector256 = true;
3164    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3165  %}
3166  ins_pipe( pipe_slow );
3167%}
3168
3169instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
3170  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3171  match(Set dst (SubVS src (LoadVector mem)));
3172  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
3173  ins_encode %{
3174    bool vector256 = true;
3175    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3176  %}
3177  ins_pipe( pipe_slow );
3178%}
3179
3180// Integers vector sub
3181instruct vsub2I(vecD dst, vecD src) %{
3182  predicate(n->as_Vector()->length() == 2);
3183  match(Set dst (SubVI dst src));
3184  format %{ "psubd   $dst,$src\t! sub packed2I" %}
3185  ins_encode %{
3186    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3187  %}
3188  ins_pipe( pipe_slow );
3189%}
3190
3191instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
3192  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3193  match(Set dst (SubVI src1 src2));
3194  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
3195  ins_encode %{
3196    bool vector256 = false;
3197    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3198  %}
3199  ins_pipe( pipe_slow );
3200%}
3201
3202instruct vsub4I(vecX dst, vecX src) %{
3203  predicate(n->as_Vector()->length() == 4);
3204  match(Set dst (SubVI dst src));
3205  format %{ "psubd   $dst,$src\t! sub packed4I" %}
3206  ins_encode %{
3207    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
3208  %}
3209  ins_pipe( pipe_slow );
3210%}
3211
3212instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
3213  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3214  match(Set dst (SubVI src1 src2));
3215  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
3216  ins_encode %{
3217    bool vector256 = false;
3218    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3219  %}
3220  ins_pipe( pipe_slow );
3221%}
3222
3223instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
3224  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3225  match(Set dst (SubVI src (LoadVector mem)));
3226  format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
3227  ins_encode %{
3228    bool vector256 = false;
3229    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3230  %}
3231  ins_pipe( pipe_slow );
3232%}
3233
3234instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
3235  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3236  match(Set dst (SubVI src1 src2));
3237  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
3238  ins_encode %{
3239    bool vector256 = true;
3240    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3241  %}
3242  ins_pipe( pipe_slow );
3243%}
3244
3245instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
3246  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3247  match(Set dst (SubVI src (LoadVector mem)));
3248  format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
3249  ins_encode %{
3250    bool vector256 = true;
3251    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3252  %}
3253  ins_pipe( pipe_slow );
3254%}
3255
3256// Longs vector sub
3257instruct vsub2L(vecX dst, vecX src) %{
3258  predicate(n->as_Vector()->length() == 2);
3259  match(Set dst (SubVL dst src));
3260  format %{ "psubq   $dst,$src\t! sub packed2L" %}
3261  ins_encode %{
3262    __ psubq($dst$$XMMRegister, $src$$XMMRegister);
3263  %}
3264  ins_pipe( pipe_slow );
3265%}
3266
3267instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
3268  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3269  match(Set dst (SubVL src1 src2));
3270  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
3271  ins_encode %{
3272    bool vector256 = false;
3273    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3274  %}
3275  ins_pipe( pipe_slow );
3276%}
3277
3278instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
3279  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3280  match(Set dst (SubVL src (LoadVector mem)));
3281  format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
3282  ins_encode %{
3283    bool vector256 = false;
3284    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3285  %}
3286  ins_pipe( pipe_slow );
3287%}
3288
3289instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
3290  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3291  match(Set dst (SubVL src1 src2));
3292  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
3293  ins_encode %{
3294    bool vector256 = true;
3295    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3296  %}
3297  ins_pipe( pipe_slow );
3298%}
3299
3300instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
3301  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
3302  match(Set dst (SubVL src (LoadVector mem)));
3303  format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
3304  ins_encode %{
3305    bool vector256 = true;
3306    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3307  %}
3308  ins_pipe( pipe_slow );
3309%}
3310
3311// Floats vector sub
3312instruct vsub2F(vecD dst, vecD src) %{
3313  predicate(n->as_Vector()->length() == 2);
3314  match(Set dst (SubVF dst src));
3315  format %{ "subps   $dst,$src\t! sub packed2F" %}
3316  ins_encode %{
3317    __ subps($dst$$XMMRegister, $src$$XMMRegister);
3318  %}
3319  ins_pipe( pipe_slow );
3320%}
3321
3322instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
3323  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3324  match(Set dst (SubVF src1 src2));
3325  format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
3326  ins_encode %{
3327    bool vector256 = false;
3328    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3329  %}
3330  ins_pipe( pipe_slow );
3331%}
3332
3333instruct vsub4F(vecX dst, vecX src) %{
3334  predicate(n->as_Vector()->length() == 4);
3335  match(Set dst (SubVF dst src));
3336  format %{ "subps   $dst,$src\t! sub packed4F" %}
3337  ins_encode %{
3338    __ subps($dst$$XMMRegister, $src$$XMMRegister);
3339  %}
3340  ins_pipe( pipe_slow );
3341%}
3342
3343instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
3344  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3345  match(Set dst (SubVF src1 src2));
3346  format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
3347  ins_encode %{
3348    bool vector256 = false;
3349    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3350  %}
3351  ins_pipe( pipe_slow );
3352%}
3353
3354instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
3355  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3356  match(Set dst (SubVF src (LoadVector mem)));
3357  format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
3358  ins_encode %{
3359    bool vector256 = false;
3360    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3361  %}
3362  ins_pipe( pipe_slow );
3363%}
3364
3365instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
3366  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3367  match(Set dst (SubVF src1 src2));
3368  format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
3369  ins_encode %{
3370    bool vector256 = true;
3371    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3372  %}
3373  ins_pipe( pipe_slow );
3374%}
3375
3376instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
3377  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3378  match(Set dst (SubVF src (LoadVector mem)));
3379  format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
3380  ins_encode %{
3381    bool vector256 = true;
3382    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3383  %}
3384  ins_pipe( pipe_slow );
3385%}
3386
3387// Doubles vector sub
3388instruct vsub2D(vecX dst, vecX src) %{
3389  predicate(n->as_Vector()->length() == 2);
3390  match(Set dst (SubVD dst src));
3391  format %{ "subpd   $dst,$src\t! sub packed2D" %}
3392  ins_encode %{
3393    __ subpd($dst$$XMMRegister, $src$$XMMRegister);
3394  %}
3395  ins_pipe( pipe_slow );
3396%}
3397
3398instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
3399  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3400  match(Set dst (SubVD src1 src2));
3401  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
3402  ins_encode %{
3403    bool vector256 = false;
3404    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3405  %}
3406  ins_pipe( pipe_slow );
3407%}
3408
3409instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
3410  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3411  match(Set dst (SubVD src (LoadVector mem)));
3412  format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
3413  ins_encode %{
3414    bool vector256 = false;
3415    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3416  %}
3417  ins_pipe( pipe_slow );
3418%}
3419
3420instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
3421  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3422  match(Set dst (SubVD src1 src2));
3423  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
3424  ins_encode %{
3425    bool vector256 = true;
3426    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3427  %}
3428  ins_pipe( pipe_slow );
3429%}
3430
3431instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
3432  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3433  match(Set dst (SubVD src (LoadVector mem)));
3434  format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
3435  ins_encode %{
3436    bool vector256 = true;
3437    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3438  %}
3439  ins_pipe( pipe_slow );
3440%}
3441
3442// --------------------------------- MUL --------------------------------------
3443
3444// Shorts/Chars vector mul
3445instruct vmul2S(vecS dst, vecS src) %{
3446  predicate(n->as_Vector()->length() == 2);
3447  match(Set dst (MulVS dst src));
3448  format %{ "pmullw $dst,$src\t! mul packed2S" %}
3449  ins_encode %{
3450    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3451  %}
3452  ins_pipe( pipe_slow );
3453%}
3454
3455instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
3456  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3457  match(Set dst (MulVS src1 src2));
3458  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
3459  ins_encode %{
3460    bool vector256 = false;
3461    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3462  %}
3463  ins_pipe( pipe_slow );
3464%}
3465
3466instruct vmul4S(vecD dst, vecD src) %{
3467  predicate(n->as_Vector()->length() == 4);
3468  match(Set dst (MulVS dst src));
3469  format %{ "pmullw  $dst,$src\t! mul packed4S" %}
3470  ins_encode %{
3471    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3472  %}
3473  ins_pipe( pipe_slow );
3474%}
3475
3476instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
3477  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3478  match(Set dst (MulVS src1 src2));
3479  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
3480  ins_encode %{
3481    bool vector256 = false;
3482    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3483  %}
3484  ins_pipe( pipe_slow );
3485%}
3486
3487instruct vmul8S(vecX dst, vecX src) %{
3488  predicate(n->as_Vector()->length() == 8);
3489  match(Set dst (MulVS dst src));
3490  format %{ "pmullw  $dst,$src\t! mul packed8S" %}
3491  ins_encode %{
3492    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
3493  %}
3494  ins_pipe( pipe_slow );
3495%}
3496
3497instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
3498  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3499  match(Set dst (MulVS src1 src2));
3500  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
3501  ins_encode %{
3502    bool vector256 = false;
3503    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3504  %}
3505  ins_pipe( pipe_slow );
3506%}
3507
3508instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
3509  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3510  match(Set dst (MulVS src (LoadVector mem)));
3511  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
3512  ins_encode %{
3513    bool vector256 = false;
3514    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3515  %}
3516  ins_pipe( pipe_slow );
3517%}
3518
3519instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
3520  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3521  match(Set dst (MulVS src1 src2));
3522  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
3523  ins_encode %{
3524    bool vector256 = true;
3525    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3526  %}
3527  ins_pipe( pipe_slow );
3528%}
3529
3530instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
3531  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
3532  match(Set dst (MulVS src (LoadVector mem)));
3533  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
3534  ins_encode %{
3535    bool vector256 = true;
3536    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3537  %}
3538  ins_pipe( pipe_slow );
3539%}
3540
3541// Integers vector mul (sse4_1)
3542instruct vmul2I(vecD dst, vecD src) %{
3543  predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
3544  match(Set dst (MulVI dst src));
3545  format %{ "pmulld  $dst,$src\t! mul packed2I" %}
3546  ins_encode %{
3547    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3548  %}
3549  ins_pipe( pipe_slow );
3550%}
3551
3552instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
3553  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3554  match(Set dst (MulVI src1 src2));
3555  format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
3556  ins_encode %{
3557    bool vector256 = false;
3558    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3559  %}
3560  ins_pipe( pipe_slow );
3561%}
3562
3563instruct vmul4I(vecX dst, vecX src) %{
3564  predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
3565  match(Set dst (MulVI dst src));
3566  format %{ "pmulld  $dst,$src\t! mul packed4I" %}
3567  ins_encode %{
3568    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
3569  %}
3570  ins_pipe( pipe_slow );
3571%}
3572
3573instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
3574  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3575  match(Set dst (MulVI src1 src2));
3576  format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
3577  ins_encode %{
3578    bool vector256 = false;
3579    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3580  %}
3581  ins_pipe( pipe_slow );
3582%}
3583
3584instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
3585  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3586  match(Set dst (MulVI src (LoadVector mem)));
3587  format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
3588  ins_encode %{
3589    bool vector256 = false;
3590    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3591  %}
3592  ins_pipe( pipe_slow );
3593%}
3594
3595instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
3596  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3597  match(Set dst (MulVI src1 src2));
3598  format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
3599  ins_encode %{
3600    bool vector256 = true;
3601    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3602  %}
3603  ins_pipe( pipe_slow );
3604%}
3605
3606instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
3607  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
3608  match(Set dst (MulVI src (LoadVector mem)));
3609  format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
3610  ins_encode %{
3611    bool vector256 = true;
3612    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3613  %}
3614  ins_pipe( pipe_slow );
3615%}
3616
3617// Floats vector mul
3618instruct vmul2F(vecD dst, vecD src) %{
3619  predicate(n->as_Vector()->length() == 2);
3620  match(Set dst (MulVF dst src));
3621  format %{ "mulps   $dst,$src\t! mul packed2F" %}
3622  ins_encode %{
3623    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3624  %}
3625  ins_pipe( pipe_slow );
3626%}
3627
3628instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
3629  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3630  match(Set dst (MulVF src1 src2));
3631  format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
3632  ins_encode %{
3633    bool vector256 = false;
3634    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3635  %}
3636  ins_pipe( pipe_slow );
3637%}
3638
3639instruct vmul4F(vecX dst, vecX src) %{
3640  predicate(n->as_Vector()->length() == 4);
3641  match(Set dst (MulVF dst src));
3642  format %{ "mulps   $dst,$src\t! mul packed4F" %}
3643  ins_encode %{
3644    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
3645  %}
3646  ins_pipe( pipe_slow );
3647%}
3648
3649instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
3650  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3651  match(Set dst (MulVF src1 src2));
3652  format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
3653  ins_encode %{
3654    bool vector256 = false;
3655    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3656  %}
3657  ins_pipe( pipe_slow );
3658%}
3659
3660instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
3661  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3662  match(Set dst (MulVF src (LoadVector mem)));
3663  format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
3664  ins_encode %{
3665    bool vector256 = false;
3666    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3667  %}
3668  ins_pipe( pipe_slow );
3669%}
3670
3671instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
3672  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3673  match(Set dst (MulVF src1 src2));
3674  format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
3675  ins_encode %{
3676    bool vector256 = true;
3677    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3678  %}
3679  ins_pipe( pipe_slow );
3680%}
3681
3682instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
3683  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3684  match(Set dst (MulVF src (LoadVector mem)));
3685  format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
3686  ins_encode %{
3687    bool vector256 = true;
3688    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3689  %}
3690  ins_pipe( pipe_slow );
3691%}
3692
3693// Doubles vector mul
3694instruct vmul2D(vecX dst, vecX src) %{
3695  predicate(n->as_Vector()->length() == 2);
3696  match(Set dst (MulVD dst src));
3697  format %{ "mulpd   $dst,$src\t! mul packed2D" %}
3698  ins_encode %{
3699    __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
3700  %}
3701  ins_pipe( pipe_slow );
3702%}
3703
3704instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
3705  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3706  match(Set dst (MulVD src1 src2));
3707  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
3708  ins_encode %{
3709    bool vector256 = false;
3710    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3711  %}
3712  ins_pipe( pipe_slow );
3713%}
3714
3715instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
3716  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3717  match(Set dst (MulVD src (LoadVector mem)));
3718  format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
3719  ins_encode %{
3720    bool vector256 = false;
3721    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3722  %}
3723  ins_pipe( pipe_slow );
3724%}
3725
3726instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
3727  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3728  match(Set dst (MulVD src1 src2));
3729  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
3730  ins_encode %{
3731    bool vector256 = true;
3732    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3733  %}
3734  ins_pipe( pipe_slow );
3735%}
3736
3737instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
3738  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3739  match(Set dst (MulVD src (LoadVector mem)));
3740  format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
3741  ins_encode %{
3742    bool vector256 = true;
3743    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3744  %}
3745  ins_pipe( pipe_slow );
3746%}
3747
3748// --------------------------------- DIV --------------------------------------
3749
3750// Floats vector div
3751instruct vdiv2F(vecD dst, vecD src) %{
3752  predicate(n->as_Vector()->length() == 2);
3753  match(Set dst (DivVF dst src));
3754  format %{ "divps   $dst,$src\t! div packed2F" %}
3755  ins_encode %{
3756    __ divps($dst$$XMMRegister, $src$$XMMRegister);
3757  %}
3758  ins_pipe( pipe_slow );
3759%}
3760
3761instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
3762  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3763  match(Set dst (DivVF src1 src2));
3764  format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
3765  ins_encode %{
3766    bool vector256 = false;
3767    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3768  %}
3769  ins_pipe( pipe_slow );
3770%}
3771
3772instruct vdiv4F(vecX dst, vecX src) %{
3773  predicate(n->as_Vector()->length() == 4);
3774  match(Set dst (DivVF dst src));
3775  format %{ "divps   $dst,$src\t! div packed4F" %}
3776  ins_encode %{
3777    __ divps($dst$$XMMRegister, $src$$XMMRegister);
3778  %}
3779  ins_pipe( pipe_slow );
3780%}
3781
3782instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
3783  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3784  match(Set dst (DivVF src1 src2));
3785  format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
3786  ins_encode %{
3787    bool vector256 = false;
3788    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3789  %}
3790  ins_pipe( pipe_slow );
3791%}
3792
3793instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
3794  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3795  match(Set dst (DivVF src (LoadVector mem)));
3796  format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
3797  ins_encode %{
3798    bool vector256 = false;
3799    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3800  %}
3801  ins_pipe( pipe_slow );
3802%}
3803
3804instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
3805  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3806  match(Set dst (DivVF src1 src2));
3807  format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
3808  ins_encode %{
3809    bool vector256 = true;
3810    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3811  %}
3812  ins_pipe( pipe_slow );
3813%}
3814
3815instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
3816  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
3817  match(Set dst (DivVF src (LoadVector mem)));
3818  format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
3819  ins_encode %{
3820    bool vector256 = true;
3821    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3822  %}
3823  ins_pipe( pipe_slow );
3824%}
3825
3826// Doubles vector div
3827instruct vdiv2D(vecX dst, vecX src) %{
3828  predicate(n->as_Vector()->length() == 2);
3829  match(Set dst (DivVD dst src));
3830  format %{ "divpd   $dst,$src\t! div packed2D" %}
3831  ins_encode %{
3832    __ divpd($dst$$XMMRegister, $src$$XMMRegister);
3833  %}
3834  ins_pipe( pipe_slow );
3835%}
3836
3837instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
3838  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3839  match(Set dst (DivVD src1 src2));
3840  format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
3841  ins_encode %{
3842    bool vector256 = false;
3843    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3844  %}
3845  ins_pipe( pipe_slow );
3846%}
3847
3848instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
3849  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3850  match(Set dst (DivVD src (LoadVector mem)));
3851  format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
3852  ins_encode %{
3853    bool vector256 = false;
3854    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3855  %}
3856  ins_pipe( pipe_slow );
3857%}
3858
3859instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
3860  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3861  match(Set dst (DivVD src1 src2));
3862  format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
3863  ins_encode %{
3864    bool vector256 = true;
3865    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
3866  %}
3867  ins_pipe( pipe_slow );
3868%}
3869
3870instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
3871  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3872  match(Set dst (DivVD src (LoadVector mem)));
3873  format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
3874  ins_encode %{
3875    bool vector256 = true;
3876    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
3877  %}
3878  ins_pipe( pipe_slow );
3879%}
3880
3881// ------------------------------ Shift ---------------------------------------
3882
3883// Left and right shift count vectors are the same on x86
3884// (only lowest bits of xmm reg are used for count).
3885instruct vshiftcnt(vecS dst, rRegI cnt) %{
3886  match(Set dst (LShiftCntV cnt));
3887  match(Set dst (RShiftCntV cnt));
3888  format %{ "movd    $dst,$cnt\t! load shift count" %}
3889  ins_encode %{
3890    __ movdl($dst$$XMMRegister, $cnt$$Register);
3891  %}
3892  ins_pipe( pipe_slow );
3893%}
3894
3895// ------------------------------ LeftShift -----------------------------------
3896
3897// Shorts/Chars vector left shift
3898instruct vsll2S(vecS dst, vecS shift) %{
3899  predicate(n->as_Vector()->length() == 2);
3900  match(Set dst (LShiftVS dst shift));
3901  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3902  ins_encode %{
3903    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3904  %}
3905  ins_pipe( pipe_slow );
3906%}
3907
3908instruct vsll2S_imm(vecS dst, immI8 shift) %{
3909  predicate(n->as_Vector()->length() == 2);
3910  match(Set dst (LShiftVS dst shift));
3911  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
3912  ins_encode %{
3913    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3914  %}
3915  ins_pipe( pipe_slow );
3916%}
3917
3918instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{
3919  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3920  match(Set dst (LShiftVS src shift));
3921  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3922  ins_encode %{
3923    bool vector256 = false;
3924    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3925  %}
3926  ins_pipe( pipe_slow );
3927%}
3928
3929instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
3930  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
3931  match(Set dst (LShiftVS src shift));
3932  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
3933  ins_encode %{
3934    bool vector256 = false;
3935    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3936  %}
3937  ins_pipe( pipe_slow );
3938%}
3939
3940instruct vsll4S(vecD dst, vecS shift) %{
3941  predicate(n->as_Vector()->length() == 4);
3942  match(Set dst (LShiftVS dst shift));
3943  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3944  ins_encode %{
3945    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3946  %}
3947  ins_pipe( pipe_slow );
3948%}
3949
3950instruct vsll4S_imm(vecD dst, immI8 shift) %{
3951  predicate(n->as_Vector()->length() == 4);
3952  match(Set dst (LShiftVS dst shift));
3953  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
3954  ins_encode %{
3955    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3956  %}
3957  ins_pipe( pipe_slow );
3958%}
3959
3960instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{
3961  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3962  match(Set dst (LShiftVS src shift));
3963  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3964  ins_encode %{
3965    bool vector256 = false;
3966    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
3967  %}
3968  ins_pipe( pipe_slow );
3969%}
3970
3971instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
3972  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
3973  match(Set dst (LShiftVS src shift));
3974  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
3975  ins_encode %{
3976    bool vector256 = false;
3977    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
3978  %}
3979  ins_pipe( pipe_slow );
3980%}
3981
3982instruct vsll8S(vecX dst, vecS shift) %{
3983  predicate(n->as_Vector()->length() == 8);
3984  match(Set dst (LShiftVS dst shift));
3985  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
3986  ins_encode %{
3987    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
3988  %}
3989  ins_pipe( pipe_slow );
3990%}
3991
3992instruct vsll8S_imm(vecX dst, immI8 shift) %{
3993  predicate(n->as_Vector()->length() == 8);
3994  match(Set dst (LShiftVS dst shift));
3995  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
3996  ins_encode %{
3997    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
3998  %}
3999  ins_pipe( pipe_slow );
4000%}
4001
4002instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{
4003  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4004  match(Set dst (LShiftVS src shift));
4005  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
4006  ins_encode %{
4007    bool vector256 = false;
4008    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4009  %}
4010  ins_pipe( pipe_slow );
4011%}
4012
4013instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4014  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4015  match(Set dst (LShiftVS src shift));
4016  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
4017  ins_encode %{
4018    bool vector256 = false;
4019    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4020  %}
4021  ins_pipe( pipe_slow );
4022%}
4023
4024instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{
4025  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4026  match(Set dst (LShiftVS src shift));
4027  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
4028  ins_encode %{
4029    bool vector256 = true;
4030    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4031  %}
4032  ins_pipe( pipe_slow );
4033%}
4034
4035instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4036  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4037  match(Set dst (LShiftVS src shift));
4038  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
4039  ins_encode %{
4040    bool vector256 = true;
4041    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4042  %}
4043  ins_pipe( pipe_slow );
4044%}
4045
4046// Integers vector left shift
4047instruct vsll2I(vecD dst, vecS shift) %{
4048  predicate(n->as_Vector()->length() == 2);
4049  match(Set dst (LShiftVI dst shift));
4050  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
4051  ins_encode %{
4052    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4053  %}
4054  ins_pipe( pipe_slow );
4055%}
4056
4057instruct vsll2I_imm(vecD dst, immI8 shift) %{
4058  predicate(n->as_Vector()->length() == 2);
4059  match(Set dst (LShiftVI dst shift));
4060  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
4061  ins_encode %{
4062    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4063  %}
4064  ins_pipe( pipe_slow );
4065%}
4066
4067instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{
4068  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4069  match(Set dst (LShiftVI src shift));
4070  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
4071  ins_encode %{
4072    bool vector256 = false;
4073    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4074  %}
4075  ins_pipe( pipe_slow );
4076%}
4077
4078instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4079  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4080  match(Set dst (LShiftVI src shift));
4081  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
4082  ins_encode %{
4083    bool vector256 = false;
4084    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4085  %}
4086  ins_pipe( pipe_slow );
4087%}
4088
4089instruct vsll4I(vecX dst, vecS shift) %{
4090  predicate(n->as_Vector()->length() == 4);
4091  match(Set dst (LShiftVI dst shift));
4092  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
4093  ins_encode %{
4094    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
4095  %}
4096  ins_pipe( pipe_slow );
4097%}
4098
4099instruct vsll4I_imm(vecX dst, immI8 shift) %{
4100  predicate(n->as_Vector()->length() == 4);
4101  match(Set dst (LShiftVI dst shift));
4102  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
4103  ins_encode %{
4104    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
4105  %}
4106  ins_pipe( pipe_slow );
4107%}
4108
4109instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{
4110  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4111  match(Set dst (LShiftVI src shift));
4112  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
4113  ins_encode %{
4114    bool vector256 = false;
4115    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4116  %}
4117  ins_pipe( pipe_slow );
4118%}
4119
4120instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4121  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4122  match(Set dst (LShiftVI src shift));
4123  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
4124  ins_encode %{
4125    bool vector256 = false;
4126    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4127  %}
4128  ins_pipe( pipe_slow );
4129%}
4130
4131instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{
4132  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4133  match(Set dst (LShiftVI src shift));
4134  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4135  ins_encode %{
4136    bool vector256 = true;
4137    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4138  %}
4139  ins_pipe( pipe_slow );
4140%}
4141
4142instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4143  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4144  match(Set dst (LShiftVI src shift));
4145  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
4146  ins_encode %{
4147    bool vector256 = true;
4148    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4149  %}
4150  ins_pipe( pipe_slow );
4151%}
4152
4153// Longs vector left shift
4154instruct vsll2L(vecX dst, vecS shift) %{
4155  predicate(n->as_Vector()->length() == 2);
4156  match(Set dst (LShiftVL dst shift));
4157  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4158  ins_encode %{
4159    __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
4160  %}
4161  ins_pipe( pipe_slow );
4162%}
4163
4164instruct vsll2L_imm(vecX dst, immI8 shift) %{
4165  predicate(n->as_Vector()->length() == 2);
4166  match(Set dst (LShiftVL dst shift));
4167  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
4168  ins_encode %{
4169    __ psllq($dst$$XMMRegister, (int)$shift$$constant);
4170  %}
4171  ins_pipe( pipe_slow );
4172%}
4173
4174instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{
4175  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4176  match(Set dst (LShiftVL src shift));
4177  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4178  ins_encode %{
4179    bool vector256 = false;
4180    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4181  %}
4182  ins_pipe( pipe_slow );
4183%}
4184
4185instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4186  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4187  match(Set dst (LShiftVL src shift));
4188  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
4189  ins_encode %{
4190    bool vector256 = false;
4191    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4192  %}
4193  ins_pipe( pipe_slow );
4194%}
4195
4196instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{
4197  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4198  match(Set dst (LShiftVL src shift));
4199  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4200  ins_encode %{
4201    bool vector256 = true;
4202    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4203  %}
4204  ins_pipe( pipe_slow );
4205%}
4206
4207instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4208  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4209  match(Set dst (LShiftVL src shift));
4210  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
4211  ins_encode %{
4212    bool vector256 = true;
4213    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4214  %}
4215  ins_pipe( pipe_slow );
4216%}
4217
4218// ----------------------- LogicalRightShift -----------------------------------
4219
4220// Shorts vector logical right shift produces incorrect Java result
4221// for negative data because java code convert short value into int with
4222// sign extension before a shift. But char vectors are fine since chars are
4223// unsigned values.
4224
4225instruct vsrl2S(vecS dst, vecS shift) %{
4226  predicate(n->as_Vector()->length() == 2);
4227  match(Set dst (URShiftVS dst shift));
4228  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4229  ins_encode %{
4230    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4231  %}
4232  ins_pipe( pipe_slow );
4233%}
4234
4235instruct vsrl2S_imm(vecS dst, immI8 shift) %{
4236  predicate(n->as_Vector()->length() == 2);
4237  match(Set dst (URShiftVS dst shift));
4238  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
4239  ins_encode %{
4240    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4241  %}
4242  ins_pipe( pipe_slow );
4243%}
4244
4245instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
4246  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4247  match(Set dst (URShiftVS src shift));
4248  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4249  ins_encode %{
4250    bool vector256 = false;
4251    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4252  %}
4253  ins_pipe( pipe_slow );
4254%}
4255
4256instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4257  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4258  match(Set dst (URShiftVS src shift));
4259  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
4260  ins_encode %{
4261    bool vector256 = false;
4262    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4263  %}
4264  ins_pipe( pipe_slow );
4265%}
4266
4267instruct vsrl4S(vecD dst, vecS shift) %{
4268  predicate(n->as_Vector()->length() == 4);
4269  match(Set dst (URShiftVS dst shift));
4270  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4271  ins_encode %{
4272    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4273  %}
4274  ins_pipe( pipe_slow );
4275%}
4276
4277instruct vsrl4S_imm(vecD dst, immI8 shift) %{
4278  predicate(n->as_Vector()->length() == 4);
4279  match(Set dst (URShiftVS dst shift));
4280  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
4281  ins_encode %{
4282    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4283  %}
4284  ins_pipe( pipe_slow );
4285%}
4286
4287instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
4288  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4289  match(Set dst (URShiftVS src shift));
4290  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4291  ins_encode %{
4292    bool vector256 = false;
4293    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4294  %}
4295  ins_pipe( pipe_slow );
4296%}
4297
4298instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4299  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4300  match(Set dst (URShiftVS src shift));
4301  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
4302  ins_encode %{
4303    bool vector256 = false;
4304    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4305  %}
4306  ins_pipe( pipe_slow );
4307%}
4308
4309instruct vsrl8S(vecX dst, vecS shift) %{
4310  predicate(n->as_Vector()->length() == 8);
4311  match(Set dst (URShiftVS dst shift));
4312  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4313  ins_encode %{
4314    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
4315  %}
4316  ins_pipe( pipe_slow );
4317%}
4318
4319instruct vsrl8S_imm(vecX dst, immI8 shift) %{
4320  predicate(n->as_Vector()->length() == 8);
4321  match(Set dst (URShiftVS dst shift));
4322  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
4323  ins_encode %{
4324    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
4325  %}
4326  ins_pipe( pipe_slow );
4327%}
4328
4329instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
4330  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4331  match(Set dst (URShiftVS src shift));
4332  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4333  ins_encode %{
4334    bool vector256 = false;
4335    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4336  %}
4337  ins_pipe( pipe_slow );
4338%}
4339
4340instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4341  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4342  match(Set dst (URShiftVS src shift));
4343  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
4344  ins_encode %{
4345    bool vector256 = false;
4346    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4347  %}
4348  ins_pipe( pipe_slow );
4349%}
4350
4351instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
4352  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4353  match(Set dst (URShiftVS src shift));
4354  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4355  ins_encode %{
4356    bool vector256 = true;
4357    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4358  %}
4359  ins_pipe( pipe_slow );
4360%}
4361
4362instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4363  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4364  match(Set dst (URShiftVS src shift));
4365  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
4366  ins_encode %{
4367    bool vector256 = true;
4368    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4369  %}
4370  ins_pipe( pipe_slow );
4371%}
4372
4373// Integers vector logical right shift
4374instruct vsrl2I(vecD dst, vecS shift) %{
4375  predicate(n->as_Vector()->length() == 2);
4376  match(Set dst (URShiftVI dst shift));
4377  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4378  ins_encode %{
4379    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4380  %}
4381  ins_pipe( pipe_slow );
4382%}
4383
4384instruct vsrl2I_imm(vecD dst, immI8 shift) %{
4385  predicate(n->as_Vector()->length() == 2);
4386  match(Set dst (URShiftVI dst shift));
4387  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
4388  ins_encode %{
4389    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4390  %}
4391  ins_pipe( pipe_slow );
4392%}
4393
4394instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{
4395  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4396  match(Set dst (URShiftVI src shift));
4397  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4398  ins_encode %{
4399    bool vector256 = false;
4400    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4401  %}
4402  ins_pipe( pipe_slow );
4403%}
4404
4405instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4406  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4407  match(Set dst (URShiftVI src shift));
4408  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
4409  ins_encode %{
4410    bool vector256 = false;
4411    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4412  %}
4413  ins_pipe( pipe_slow );
4414%}
4415
4416instruct vsrl4I(vecX dst, vecS shift) %{
4417  predicate(n->as_Vector()->length() == 4);
4418  match(Set dst (URShiftVI dst shift));
4419  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4420  ins_encode %{
4421    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
4422  %}
4423  ins_pipe( pipe_slow );
4424%}
4425
4426instruct vsrl4I_imm(vecX dst, immI8 shift) %{
4427  predicate(n->as_Vector()->length() == 4);
4428  match(Set dst (URShiftVI dst shift));
4429  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
4430  ins_encode %{
4431    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
4432  %}
4433  ins_pipe( pipe_slow );
4434%}
4435
4436instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{
4437  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4438  match(Set dst (URShiftVI src shift));
4439  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4440  ins_encode %{
4441    bool vector256 = false;
4442    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4443  %}
4444  ins_pipe( pipe_slow );
4445%}
4446
4447instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4448  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4449  match(Set dst (URShiftVI src shift));
4450  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
4451  ins_encode %{
4452    bool vector256 = false;
4453    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4454  %}
4455  ins_pipe( pipe_slow );
4456%}
4457
4458instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{
4459  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4460  match(Set dst (URShiftVI src shift));
4461  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4462  ins_encode %{
4463    bool vector256 = true;
4464    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4465  %}
4466  ins_pipe( pipe_slow );
4467%}
4468
4469instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4470  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4471  match(Set dst (URShiftVI src shift));
4472  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
4473  ins_encode %{
4474    bool vector256 = true;
4475    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4476  %}
4477  ins_pipe( pipe_slow );
4478%}
4479
4480// Longs vector logical right shift
4481instruct vsrl2L(vecX dst, vecS shift) %{
4482  predicate(n->as_Vector()->length() == 2);
4483  match(Set dst (URShiftVL dst shift));
4484  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4485  ins_encode %{
4486    __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
4487  %}
4488  ins_pipe( pipe_slow );
4489%}
4490
4491instruct vsrl2L_imm(vecX dst, immI8 shift) %{
4492  predicate(n->as_Vector()->length() == 2);
4493  match(Set dst (URShiftVL dst shift));
4494  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
4495  ins_encode %{
4496    __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
4497  %}
4498  ins_pipe( pipe_slow );
4499%}
4500
4501instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{
4502  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4503  match(Set dst (URShiftVL src shift));
4504  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4505  ins_encode %{
4506    bool vector256 = false;
4507    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4508  %}
4509  ins_pipe( pipe_slow );
4510%}
4511
4512instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
4513  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4514  match(Set dst (URShiftVL src shift));
4515  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
4516  ins_encode %{
4517    bool vector256 = false;
4518    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4519  %}
4520  ins_pipe( pipe_slow );
4521%}
4522
4523instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{
4524  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4525  match(Set dst (URShiftVL src shift));
4526  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4527  ins_encode %{
4528    bool vector256 = true;
4529    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4530  %}
4531  ins_pipe( pipe_slow );
4532%}
4533
4534instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
4535  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
4536  match(Set dst (URShiftVL src shift));
4537  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
4538  ins_encode %{
4539    bool vector256 = true;
4540    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4541  %}
4542  ins_pipe( pipe_slow );
4543%}
4544
4545// ------------------- ArithmeticRightShift -----------------------------------
4546
4547// Shorts/Chars vector arithmetic right shift
4548instruct vsra2S(vecS dst, vecS shift) %{
4549  predicate(n->as_Vector()->length() == 2);
4550  match(Set dst (RShiftVS dst shift));
4551  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4552  ins_encode %{
4553    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4554  %}
4555  ins_pipe( pipe_slow );
4556%}
4557
4558instruct vsra2S_imm(vecS dst, immI8 shift) %{
4559  predicate(n->as_Vector()->length() == 2);
4560  match(Set dst (RShiftVS dst shift));
4561  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
4562  ins_encode %{
4563    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4564  %}
4565  ins_pipe( pipe_slow );
4566%}
4567
4568instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{
4569  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4570  match(Set dst (RShiftVS src shift));
4571  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4572  ins_encode %{
4573    bool vector256 = false;
4574    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4575  %}
4576  ins_pipe( pipe_slow );
4577%}
4578
4579instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
4580  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4581  match(Set dst (RShiftVS src shift));
4582  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
4583  ins_encode %{
4584    bool vector256 = false;
4585    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4586  %}
4587  ins_pipe( pipe_slow );
4588%}
4589
4590instruct vsra4S(vecD dst, vecS shift) %{
4591  predicate(n->as_Vector()->length() == 4);
4592  match(Set dst (RShiftVS dst shift));
4593  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4594  ins_encode %{
4595    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4596  %}
4597  ins_pipe( pipe_slow );
4598%}
4599
4600instruct vsra4S_imm(vecD dst, immI8 shift) %{
4601  predicate(n->as_Vector()->length() == 4);
4602  match(Set dst (RShiftVS dst shift));
4603  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
4604  ins_encode %{
4605    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4606  %}
4607  ins_pipe( pipe_slow );
4608%}
4609
4610instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{
4611  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4612  match(Set dst (RShiftVS src shift));
4613  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4614  ins_encode %{
4615    bool vector256 = false;
4616    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4617  %}
4618  ins_pipe( pipe_slow );
4619%}
4620
4621instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
4622  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4623  match(Set dst (RShiftVS src shift));
4624  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
4625  ins_encode %{
4626    bool vector256 = false;
4627    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4628  %}
4629  ins_pipe( pipe_slow );
4630%}
4631
4632instruct vsra8S(vecX dst, vecS shift) %{
4633  predicate(n->as_Vector()->length() == 8);
4634  match(Set dst (RShiftVS dst shift));
4635  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4636  ins_encode %{
4637    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
4638  %}
4639  ins_pipe( pipe_slow );
4640%}
4641
4642instruct vsra8S_imm(vecX dst, immI8 shift) %{
4643  predicate(n->as_Vector()->length() == 8);
4644  match(Set dst (RShiftVS dst shift));
4645  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
4646  ins_encode %{
4647    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
4648  %}
4649  ins_pipe( pipe_slow );
4650%}
4651
4652instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{
4653  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4654  match(Set dst (RShiftVS src shift));
4655  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4656  ins_encode %{
4657    bool vector256 = false;
4658    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4659  %}
4660  ins_pipe( pipe_slow );
4661%}
4662
4663instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
4664  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
4665  match(Set dst (RShiftVS src shift));
4666  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
4667  ins_encode %{
4668    bool vector256 = false;
4669    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4670  %}
4671  ins_pipe( pipe_slow );
4672%}
4673
4674instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{
4675  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4676  match(Set dst (RShiftVS src shift));
4677  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4678  ins_encode %{
4679    bool vector256 = true;
4680    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4681  %}
4682  ins_pipe( pipe_slow );
4683%}
4684
4685instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
4686  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
4687  match(Set dst (RShiftVS src shift));
4688  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
4689  ins_encode %{
4690    bool vector256 = true;
4691    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4692  %}
4693  ins_pipe( pipe_slow );
4694%}
4695
4696// Integers vector arithmetic right shift
4697instruct vsra2I(vecD dst, vecS shift) %{
4698  predicate(n->as_Vector()->length() == 2);
4699  match(Set dst (RShiftVI dst shift));
4700  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4701  ins_encode %{
4702    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4703  %}
4704  ins_pipe( pipe_slow );
4705%}
4706
4707instruct vsra2I_imm(vecD dst, immI8 shift) %{
4708  predicate(n->as_Vector()->length() == 2);
4709  match(Set dst (RShiftVI dst shift));
4710  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
4711  ins_encode %{
4712    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4713  %}
4714  ins_pipe( pipe_slow );
4715%}
4716
4717instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{
4718  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4719  match(Set dst (RShiftVI src shift));
4720  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4721  ins_encode %{
4722    bool vector256 = false;
4723    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4724  %}
4725  ins_pipe( pipe_slow );
4726%}
4727
4728instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
4729  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
4730  match(Set dst (RShiftVI src shift));
4731  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
4732  ins_encode %{
4733    bool vector256 = false;
4734    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4735  %}
4736  ins_pipe( pipe_slow );
4737%}
4738
4739instruct vsra4I(vecX dst, vecS shift) %{
4740  predicate(n->as_Vector()->length() == 4);
4741  match(Set dst (RShiftVI dst shift));
4742  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4743  ins_encode %{
4744    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
4745  %}
4746  ins_pipe( pipe_slow );
4747%}
4748
4749instruct vsra4I_imm(vecX dst, immI8 shift) %{
4750  predicate(n->as_Vector()->length() == 4);
4751  match(Set dst (RShiftVI dst shift));
4752  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
4753  ins_encode %{
4754    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
4755  %}
4756  ins_pipe( pipe_slow );
4757%}
4758
4759instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{
4760  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4761  match(Set dst (RShiftVI src shift));
4762  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4763  ins_encode %{
4764    bool vector256 = false;
4765    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4766  %}
4767  ins_pipe( pipe_slow );
4768%}
4769
4770instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
4771  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
4772  match(Set dst (RShiftVI src shift));
4773  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
4774  ins_encode %{
4775    bool vector256 = false;
4776    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4777  %}
4778  ins_pipe( pipe_slow );
4779%}
4780
4781instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{
4782  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4783  match(Set dst (RShiftVI src shift));
4784  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4785  ins_encode %{
4786    bool vector256 = true;
4787    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
4788  %}
4789  ins_pipe( pipe_slow );
4790%}
4791
4792instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
4793  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
4794  match(Set dst (RShiftVI src shift));
4795  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
4796  ins_encode %{
4797    bool vector256 = true;
4798    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
4799  %}
4800  ins_pipe( pipe_slow );
4801%}
4802
4803// There are no longs vector arithmetic right shift instructions.
4804
4805
4806// --------------------------------- AND --------------------------------------
4807
4808instruct vand4B(vecS dst, vecS src) %{
4809  predicate(n->as_Vector()->length_in_bytes() == 4);
4810  match(Set dst (AndV dst src));
4811  format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
4812  ins_encode %{
4813    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4814  %}
4815  ins_pipe( pipe_slow );
4816%}
4817
4818instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
4819  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4820  match(Set dst (AndV src1 src2));
4821  format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
4822  ins_encode %{
4823    bool vector256 = false;
4824    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4825  %}
4826  ins_pipe( pipe_slow );
4827%}
4828
4829instruct vand8B(vecD dst, vecD src) %{
4830  predicate(n->as_Vector()->length_in_bytes() == 8);
4831  match(Set dst (AndV dst src));
4832  format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
4833  ins_encode %{
4834    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4835  %}
4836  ins_pipe( pipe_slow );
4837%}
4838
4839instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
4840  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4841  match(Set dst (AndV src1 src2));
4842  format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
4843  ins_encode %{
4844    bool vector256 = false;
4845    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4846  %}
4847  ins_pipe( pipe_slow );
4848%}
4849
4850instruct vand16B(vecX dst, vecX src) %{
4851  predicate(n->as_Vector()->length_in_bytes() == 16);
4852  match(Set dst (AndV dst src));
4853  format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
4854  ins_encode %{
4855    __ pand($dst$$XMMRegister, $src$$XMMRegister);
4856  %}
4857  ins_pipe( pipe_slow );
4858%}
4859
4860instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
4861  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4862  match(Set dst (AndV src1 src2));
4863  format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
4864  ins_encode %{
4865    bool vector256 = false;
4866    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4867  %}
4868  ins_pipe( pipe_slow );
4869%}
4870
4871instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
4872  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4873  match(Set dst (AndV src (LoadVector mem)));
4874  format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
4875  ins_encode %{
4876    bool vector256 = false;
4877    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4878  %}
4879  ins_pipe( pipe_slow );
4880%}
4881
4882instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
4883  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4884  match(Set dst (AndV src1 src2));
4885  format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
4886  ins_encode %{
4887    bool vector256 = true;
4888    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4889  %}
4890  ins_pipe( pipe_slow );
4891%}
4892
4893instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
4894  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4895  match(Set dst (AndV src (LoadVector mem)));
4896  format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
4897  ins_encode %{
4898    bool vector256 = true;
4899    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4900  %}
4901  ins_pipe( pipe_slow );
4902%}
4903
4904// --------------------------------- OR ---------------------------------------
4905
4906instruct vor4B(vecS dst, vecS src) %{
4907  predicate(n->as_Vector()->length_in_bytes() == 4);
4908  match(Set dst (OrV dst src));
4909  format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
4910  ins_encode %{
4911    __ por($dst$$XMMRegister, $src$$XMMRegister);
4912  %}
4913  ins_pipe( pipe_slow );
4914%}
4915
4916instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
4917  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
4918  match(Set dst (OrV src1 src2));
4919  format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
4920  ins_encode %{
4921    bool vector256 = false;
4922    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4923  %}
4924  ins_pipe( pipe_slow );
4925%}
4926
4927instruct vor8B(vecD dst, vecD src) %{
4928  predicate(n->as_Vector()->length_in_bytes() == 8);
4929  match(Set dst (OrV dst src));
4930  format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
4931  ins_encode %{
4932    __ por($dst$$XMMRegister, $src$$XMMRegister);
4933  %}
4934  ins_pipe( pipe_slow );
4935%}
4936
4937instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
4938  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
4939  match(Set dst (OrV src1 src2));
4940  format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
4941  ins_encode %{
4942    bool vector256 = false;
4943    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4944  %}
4945  ins_pipe( pipe_slow );
4946%}
4947
4948instruct vor16B(vecX dst, vecX src) %{
4949  predicate(n->as_Vector()->length_in_bytes() == 16);
4950  match(Set dst (OrV dst src));
4951  format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
4952  ins_encode %{
4953    __ por($dst$$XMMRegister, $src$$XMMRegister);
4954  %}
4955  ins_pipe( pipe_slow );
4956%}
4957
4958instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
4959  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4960  match(Set dst (OrV src1 src2));
4961  format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
4962  ins_encode %{
4963    bool vector256 = false;
4964    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4965  %}
4966  ins_pipe( pipe_slow );
4967%}
4968
4969instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
4970  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
4971  match(Set dst (OrV src (LoadVector mem)));
4972  format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
4973  ins_encode %{
4974    bool vector256 = false;
4975    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4976  %}
4977  ins_pipe( pipe_slow );
4978%}
4979
4980instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
4981  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4982  match(Set dst (OrV src1 src2));
4983  format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
4984  ins_encode %{
4985    bool vector256 = true;
4986    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
4987  %}
4988  ins_pipe( pipe_slow );
4989%}
4990
4991instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
4992  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
4993  match(Set dst (OrV src (LoadVector mem)));
4994  format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
4995  ins_encode %{
4996    bool vector256 = true;
4997    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
4998  %}
4999  ins_pipe( pipe_slow );
5000%}
5001
5002// --------------------------------- XOR --------------------------------------
5003
5004instruct vxor4B(vecS dst, vecS src) %{
5005  predicate(n->as_Vector()->length_in_bytes() == 4);
5006  match(Set dst (XorV dst src));
5007  format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
5008  ins_encode %{
5009    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5010  %}
5011  ins_pipe( pipe_slow );
5012%}
5013
5014instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
5015  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
5016  match(Set dst (XorV src1 src2));
5017  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
5018  ins_encode %{
5019    bool vector256 = false;
5020    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5021  %}
5022  ins_pipe( pipe_slow );
5023%}
5024
5025instruct vxor8B(vecD dst, vecD src) %{
5026  predicate(n->as_Vector()->length_in_bytes() == 8);
5027  match(Set dst (XorV dst src));
5028  format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
5029  ins_encode %{
5030    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5031  %}
5032  ins_pipe( pipe_slow );
5033%}
5034
5035instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
5036  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
5037  match(Set dst (XorV src1 src2));
5038  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
5039  ins_encode %{
5040    bool vector256 = false;
5041    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5042  %}
5043  ins_pipe( pipe_slow );
5044%}
5045
5046instruct vxor16B(vecX dst, vecX src) %{
5047  predicate(n->as_Vector()->length_in_bytes() == 16);
5048  match(Set dst (XorV dst src));
5049  format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
5050  ins_encode %{
5051    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
5052  %}
5053  ins_pipe( pipe_slow );
5054%}
5055
5056instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
5057  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5058  match(Set dst (XorV src1 src2));
5059  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
5060  ins_encode %{
5061    bool vector256 = false;
5062    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5063  %}
5064  ins_pipe( pipe_slow );
5065%}
5066
5067instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
5068  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
5069  match(Set dst (XorV src (LoadVector mem)));
5070  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
5071  ins_encode %{
5072    bool vector256 = false;
5073    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5074  %}
5075  ins_pipe( pipe_slow );
5076%}
5077
5078instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
5079  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5080  match(Set dst (XorV src1 src2));
5081  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
5082  ins_encode %{
5083    bool vector256 = true;
5084    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
5085  %}
5086  ins_pipe( pipe_slow );
5087%}
5088
5089instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
5090  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
5091  match(Set dst (XorV src (LoadVector mem)));
5092  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
5093  ins_encode %{
5094    bool vector256 = true;
5095    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
5096  %}
5097  ins_pipe( pipe_slow );
5098%}
5099
5100