1// 2// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Common Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63// Word a in each register holds a Float, words ab hold a Double. 64// The whole registers are used in SSE4.2 version intrinsics, 65// array copy stubs and superword operations (see UseSSE42Intrinsics, 66// UseXMMForArrayCopy and UseSuperword flags). 67// XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68// Linux ABI: No register preserved across function calls 69// XMM0-XMM7 might hold parameters 70// Windows ABI: XMM6-XMM15 preserved across function calls 71// XMM0-XMM3 might hold parameters 72 73reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127#ifdef _WIN64 128 129reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219#else // _WIN64 220 221reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239#ifdef _LP64 240 241reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313#endif // _LP64 314 315#endif // _WIN64 316 317#ifdef _LP64 318reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319#else 320reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321#endif // _LP64 322 323alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331#ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340#endif 341 ); 342 343// flags allocation class should be last. 344alloc_class chunk2(RFLAGS); 345 346// Singleton class for condition codes 347reg_class int_flags(RFLAGS); 348 349// Class for all float registers 350reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358#ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367#endif 368 ); 369 370// Class for all double registers 371reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379#ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388#endif 389 ); 390 391// Class for all 32bit vector registers 392reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400#ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409#endif 410 ); 411 412// Class for all 64bit vector registers 413reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421#ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430#endif 431 ); 432 433// Class for all 128bit vector registers 434reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442#ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451#endif 452 ); 453 454// Class for all 256bit vector registers 455reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463#ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472#endif 473 ); 474 475%} 476 477 478//----------SOURCE BLOCK------------------------------------------------------- 479// This is a block of C++ code which provides values, functions, and 480// definitions necessary in the rest of the architecture description 481 482source_hpp %{ 483// Header information of the source block. 484// Method declarations/definitions which are used outside 485// the ad-scope can conveniently be defined here. 486// 487// To keep related declarations/definitions/uses close together, 488// we switch between source %{ }% and source_hpp %{ }% freely as needed. 489 490class CallStubImpl { 491 492 //-------------------------------------------------------------- 493 //---< Used for optimization in Compile::shorten_branches >--- 494 //-------------------------------------------------------------- 495 496 public: 497 // Size of call trampoline stub. 498 static uint size_call_trampoline() { 499 return 0; // no call trampolines on this platform 500 } 501 502 // number of relocations needed by a call trampoline stub 503 static uint reloc_call_trampoline() { 504 return 0; // no call trampolines on this platform 505 } 506}; 507 508class HandlerImpl { 509 510 public: 511 512 static int emit_exception_handler(CodeBuffer &cbuf); 513 static int emit_deopt_handler(CodeBuffer& cbuf); 514 515 static uint size_exception_handler() { 516 // NativeCall instruction size is the same as NativeJump. 517 // exception handler starts out as jump and can be patched to 518 // a call be deoptimization. (4932387) 519 // Note that this value is also credited (in output.cpp) to 520 // the size of the code section. 521 return NativeJump::instruction_size; 522 } 523 524#ifdef _LP64 525 static uint size_deopt_handler() { 526 // three 5 byte instructions 527 return 15; 528 } 529#else 530 static uint size_deopt_handler() { 531 // NativeCall instruction size is the same as NativeJump. 532 // exception handler starts out as jump and can be patched to 533 // a call be deoptimization. (4932387) 534 // Note that this value is also credited (in output.cpp) to 535 // the size of the code section. 536 return 5 + NativeJump::instruction_size; // pushl(); jmp; 537 } 538#endif 539}; 540 541%} // end source_hpp 542 543source %{ 544 545// Emit exception handler code. 546// Stuff framesize into a register and call a VM stub routine. 547int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 548 549 // Note that the code buffer's insts_mark is always relative to insts. 550 // That's why we must use the macroassembler to generate a handler. 551 MacroAssembler _masm(&cbuf); 552 address base = __ start_a_stub(size_exception_handler()); 553 if (base == NULL) { 554 ciEnv::current()->record_failure("CodeCache is full"); 555 return 0; // CodeBuffer::expand failed 556 } 557 int offset = __ offset(); 558 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 559 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 560 __ end_a_stub(); 561 return offset; 562} 563 564// Emit deopt handler code. 565int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 566 567 // Note that the code buffer's insts_mark is always relative to insts. 568 // That's why we must use the macroassembler to generate a handler. 569 MacroAssembler _masm(&cbuf); 570 address base = __ start_a_stub(size_deopt_handler()); 571 if (base == NULL) { 572 ciEnv::current()->record_failure("CodeCache is full"); 573 return 0; // CodeBuffer::expand failed 574 } 575 int offset = __ offset(); 576 577#ifdef _LP64 578 address the_pc = (address) __ pc(); 579 Label next; 580 // push a "the_pc" on the stack without destroying any registers 581 // as they all may be live. 582 583 // push address of "next" 584 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 585 __ bind(next); 586 // adjust it so it matches "the_pc" 587 __ subptr(Address(rsp, 0), __ offset() - offset); 588#else 589 InternalAddress here(__ pc()); 590 __ pushptr(here.addr()); 591#endif 592 593 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 594 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 595 __ end_a_stub(); 596 return offset; 597} 598 599 600//============================================================================= 601 602 // Float masks come from different places depending on platform. 603#ifdef _LP64 604 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 605 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 606 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 607 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 608#else 609 static address float_signmask() { return (address)float_signmask_pool; } 610 static address float_signflip() { return (address)float_signflip_pool; } 611 static address double_signmask() { return (address)double_signmask_pool; } 612 static address double_signflip() { return (address)double_signflip_pool; } 613#endif 614 615 616const bool Matcher::match_rule_supported(int opcode) { 617 if (!has_match_rule(opcode)) 618 return false; 619 620 switch (opcode) { 621 case Op_PopCountI: 622 case Op_PopCountL: 623 if (!UsePopCountInstruction) 624 return false; 625 break; 626 case Op_MulVI: 627 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 628 return false; 629 break; 630 case Op_CompareAndSwapL: 631#ifdef _LP64 632 case Op_CompareAndSwapP: 633#endif 634 if (!VM_Version::supports_cx8()) 635 return false; 636 break; 637 } 638 639 return true; // Per default match rules are supported. 640} 641 642// Max vector size in bytes. 0 if not supported. 643const int Matcher::vector_width_in_bytes(BasicType bt) { 644 assert(is_java_primitive(bt), "only primitive type vectors"); 645 if (UseSSE < 2) return 0; 646 // SSE2 supports 128bit vectors for all types. 647 // AVX2 supports 256bit vectors for all types. 648 int size = (UseAVX > 1) ? 32 : 16; 649 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 650 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 651 size = 32; 652 // Use flag to limit vector size. 653 size = MIN2(size,(int)MaxVectorSize); 654 // Minimum 2 values in vector (or 4 for bytes). 655 switch (bt) { 656 case T_DOUBLE: 657 case T_LONG: 658 if (size < 16) return 0; 659 case T_FLOAT: 660 case T_INT: 661 if (size < 8) return 0; 662 case T_BOOLEAN: 663 case T_BYTE: 664 case T_CHAR: 665 case T_SHORT: 666 if (size < 4) return 0; 667 break; 668 default: 669 ShouldNotReachHere(); 670 } 671 return size; 672} 673 674// Limits on vector size (number of elements) loaded into vector. 675const int Matcher::max_vector_size(const BasicType bt) { 676 return vector_width_in_bytes(bt)/type2aelembytes(bt); 677} 678const int Matcher::min_vector_size(const BasicType bt) { 679 int max_size = max_vector_size(bt); 680 // Min size which can be loaded into vector is 4 bytes. 681 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 682 return MIN2(size,max_size); 683} 684 685// Vector ideal reg corresponding to specidied size in bytes 686const uint Matcher::vector_ideal_reg(int size) { 687 assert(MaxVectorSize >= size, ""); 688 switch(size) { 689 case 4: return Op_VecS; 690 case 8: return Op_VecD; 691 case 16: return Op_VecX; 692 case 32: return Op_VecY; 693 } 694 ShouldNotReachHere(); 695 return 0; 696} 697 698// Only lowest bits of xmm reg are used for vector shift count. 699const uint Matcher::vector_shift_count_ideal_reg(int size) { 700 return Op_VecS; 701} 702 703// x86 supports misaligned vectors store/load. 704const bool Matcher::misaligned_vectors_ok() { 705 return !AlignVector; // can be changed by flag 706} 707 708// x86 AES instructions are compatible with SunJCE expanded 709// keys, hence we do not need to pass the original key to stubs 710const bool Matcher::pass_original_key_for_aes() { 711 return false; 712} 713 714// Helper methods for MachSpillCopyNode::implementation(). 715static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 716 int src_hi, int dst_hi, uint ireg, outputStream* st) { 717 // In 64-bit VM size calculation is very complex. Emitting instructions 718 // into scratch buffer is used to get size in 64-bit VM. 719 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 720 assert(ireg == Op_VecS || // 32bit vector 721 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 722 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 723 "no non-adjacent vector moves" ); 724 if (cbuf) { 725 MacroAssembler _masm(cbuf); 726 int offset = __ offset(); 727 switch (ireg) { 728 case Op_VecS: // copy whole register 729 case Op_VecD: 730 case Op_VecX: 731 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 732 break; 733 case Op_VecY: 734 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 735 break; 736 default: 737 ShouldNotReachHere(); 738 } 739 int size = __ offset() - offset; 740#ifdef ASSERT 741 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 742 assert(!do_size || size == 4, "incorrect size calculattion"); 743#endif 744 return size; 745#ifndef PRODUCT 746 } else if (!do_size) { 747 switch (ireg) { 748 case Op_VecS: 749 case Op_VecD: 750 case Op_VecX: 751 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 752 break; 753 case Op_VecY: 754 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 755 break; 756 default: 757 ShouldNotReachHere(); 758 } 759#endif 760 } 761 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 762 return 4; 763} 764 765static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 766 int stack_offset, int reg, uint ireg, outputStream* st) { 767 // In 64-bit VM size calculation is very complex. Emitting instructions 768 // into scratch buffer is used to get size in 64-bit VM. 769 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 770 if (cbuf) { 771 MacroAssembler _masm(cbuf); 772 int offset = __ offset(); 773 if (is_load) { 774 switch (ireg) { 775 case Op_VecS: 776 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 777 break; 778 case Op_VecD: 779 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 780 break; 781 case Op_VecX: 782 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 783 break; 784 case Op_VecY: 785 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 786 break; 787 default: 788 ShouldNotReachHere(); 789 } 790 } else { // store 791 switch (ireg) { 792 case Op_VecS: 793 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 794 break; 795 case Op_VecD: 796 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 797 break; 798 case Op_VecX: 799 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 800 break; 801 case Op_VecY: 802 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 803 break; 804 default: 805 ShouldNotReachHere(); 806 } 807 } 808 int size = __ offset() - offset; 809#ifdef ASSERT 810 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 811 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 812 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 813#endif 814 return size; 815#ifndef PRODUCT 816 } else if (!do_size) { 817 if (is_load) { 818 switch (ireg) { 819 case Op_VecS: 820 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 821 break; 822 case Op_VecD: 823 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 824 break; 825 case Op_VecX: 826 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 827 break; 828 case Op_VecY: 829 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 830 break; 831 default: 832 ShouldNotReachHere(); 833 } 834 } else { // store 835 switch (ireg) { 836 case Op_VecS: 837 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 838 break; 839 case Op_VecD: 840 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 841 break; 842 case Op_VecX: 843 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 844 break; 845 case Op_VecY: 846 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 847 break; 848 default: 849 ShouldNotReachHere(); 850 } 851 } 852#endif 853 } 854 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 855 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 856 return 5+offset_size; 857} 858 859static inline jfloat replicate4_imm(int con, int width) { 860 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 861 assert(width == 1 || width == 2, "only byte or short types here"); 862 int bit_width = width * 8; 863 jint val = con; 864 val &= (1 << bit_width) - 1; // mask off sign bits 865 while(bit_width < 32) { 866 val |= (val << bit_width); 867 bit_width <<= 1; 868 } 869 jfloat fval = *((jfloat*) &val); // coerce to float type 870 return fval; 871} 872 873static inline jdouble replicate8_imm(int con, int width) { 874 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 875 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 876 int bit_width = width * 8; 877 jlong val = con; 878 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 879 while(bit_width < 64) { 880 val |= (val << bit_width); 881 bit_width <<= 1; 882 } 883 jdouble dval = *((jdouble*) &val); // coerce to double type 884 return dval; 885} 886 887#ifndef PRODUCT 888 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 889 st->print("nop \t# %d bytes pad for loops and calls", _count); 890 } 891#endif 892 893 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 894 MacroAssembler _masm(&cbuf); 895 __ nop(_count); 896 } 897 898 uint MachNopNode::size(PhaseRegAlloc*) const { 899 return _count; 900 } 901 902#ifndef PRODUCT 903 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 904 st->print("# breakpoint"); 905 } 906#endif 907 908 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 909 MacroAssembler _masm(&cbuf); 910 __ int3(); 911 } 912 913 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 914 return MachNode::size(ra_); 915 } 916 917%} 918 919encode %{ 920 921 enc_class call_epilog %{ 922 if (VerifyStackAtCalls) { 923 // Check that stack depth is unchanged: find majik cookie on stack 924 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 925 MacroAssembler _masm(&cbuf); 926 Label L; 927 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 928 __ jccb(Assembler::equal, L); 929 // Die if stack mismatch 930 __ int3(); 931 __ bind(L); 932 } 933 %} 934 935%} 936 937 938//----------OPERANDS----------------------------------------------------------- 939// Operand definitions must precede instruction definitions for correct parsing 940// in the ADLC because operands constitute user defined types which are used in 941// instruction definitions. 942 943// Vectors 944operand vecS() %{ 945 constraint(ALLOC_IN_RC(vectors_reg)); 946 match(VecS); 947 948 format %{ %} 949 interface(REG_INTER); 950%} 951 952operand vecD() %{ 953 constraint(ALLOC_IN_RC(vectord_reg)); 954 match(VecD); 955 956 format %{ %} 957 interface(REG_INTER); 958%} 959 960operand vecX() %{ 961 constraint(ALLOC_IN_RC(vectorx_reg)); 962 match(VecX); 963 964 format %{ %} 965 interface(REG_INTER); 966%} 967 968operand vecY() %{ 969 constraint(ALLOC_IN_RC(vectory_reg)); 970 match(VecY); 971 972 format %{ %} 973 interface(REG_INTER); 974%} 975 976 977// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 978 979// ============================================================================ 980 981instruct ShouldNotReachHere() %{ 982 match(Halt); 983 format %{ "int3\t# ShouldNotReachHere" %} 984 ins_encode %{ 985 __ int3(); 986 %} 987 ins_pipe(pipe_slow); 988%} 989 990// ============================================================================ 991 992instruct addF_reg(regF dst, regF src) %{ 993 predicate((UseSSE>=1) && (UseAVX == 0)); 994 match(Set dst (AddF dst src)); 995 996 format %{ "addss $dst, $src" %} 997 ins_cost(150); 998 ins_encode %{ 999 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1000 %} 1001 ins_pipe(pipe_slow); 1002%} 1003 1004instruct addF_mem(regF dst, memory src) %{ 1005 predicate((UseSSE>=1) && (UseAVX == 0)); 1006 match(Set dst (AddF dst (LoadF src))); 1007 1008 format %{ "addss $dst, $src" %} 1009 ins_cost(150); 1010 ins_encode %{ 1011 __ addss($dst$$XMMRegister, $src$$Address); 1012 %} 1013 ins_pipe(pipe_slow); 1014%} 1015 1016instruct addF_imm(regF dst, immF con) %{ 1017 predicate((UseSSE>=1) && (UseAVX == 0)); 1018 match(Set dst (AddF dst con)); 1019 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1020 ins_cost(150); 1021 ins_encode %{ 1022 __ addss($dst$$XMMRegister, $constantaddress($con)); 1023 %} 1024 ins_pipe(pipe_slow); 1025%} 1026 1027instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1028 predicate(UseAVX > 0); 1029 match(Set dst (AddF src1 src2)); 1030 1031 format %{ "vaddss $dst, $src1, $src2" %} 1032 ins_cost(150); 1033 ins_encode %{ 1034 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1035 %} 1036 ins_pipe(pipe_slow); 1037%} 1038 1039instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1040 predicate(UseAVX > 0); 1041 match(Set dst (AddF src1 (LoadF src2))); 1042 1043 format %{ "vaddss $dst, $src1, $src2" %} 1044 ins_cost(150); 1045 ins_encode %{ 1046 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1047 %} 1048 ins_pipe(pipe_slow); 1049%} 1050 1051instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1052 predicate(UseAVX > 0); 1053 match(Set dst (AddF src con)); 1054 1055 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1056 ins_cost(150); 1057 ins_encode %{ 1058 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1059 %} 1060 ins_pipe(pipe_slow); 1061%} 1062 1063instruct addD_reg(regD dst, regD src) %{ 1064 predicate((UseSSE>=2) && (UseAVX == 0)); 1065 match(Set dst (AddD dst src)); 1066 1067 format %{ "addsd $dst, $src" %} 1068 ins_cost(150); 1069 ins_encode %{ 1070 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1071 %} 1072 ins_pipe(pipe_slow); 1073%} 1074 1075instruct addD_mem(regD dst, memory src) %{ 1076 predicate((UseSSE>=2) && (UseAVX == 0)); 1077 match(Set dst (AddD dst (LoadD src))); 1078 1079 format %{ "addsd $dst, $src" %} 1080 ins_cost(150); 1081 ins_encode %{ 1082 __ addsd($dst$$XMMRegister, $src$$Address); 1083 %} 1084 ins_pipe(pipe_slow); 1085%} 1086 1087instruct addD_imm(regD dst, immD con) %{ 1088 predicate((UseSSE>=2) && (UseAVX == 0)); 1089 match(Set dst (AddD dst con)); 1090 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1091 ins_cost(150); 1092 ins_encode %{ 1093 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1094 %} 1095 ins_pipe(pipe_slow); 1096%} 1097 1098instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1099 predicate(UseAVX > 0); 1100 match(Set dst (AddD src1 src2)); 1101 1102 format %{ "vaddsd $dst, $src1, $src2" %} 1103 ins_cost(150); 1104 ins_encode %{ 1105 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1106 %} 1107 ins_pipe(pipe_slow); 1108%} 1109 1110instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1111 predicate(UseAVX > 0); 1112 match(Set dst (AddD src1 (LoadD src2))); 1113 1114 format %{ "vaddsd $dst, $src1, $src2" %} 1115 ins_cost(150); 1116 ins_encode %{ 1117 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1118 %} 1119 ins_pipe(pipe_slow); 1120%} 1121 1122instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1123 predicate(UseAVX > 0); 1124 match(Set dst (AddD src con)); 1125 1126 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1127 ins_cost(150); 1128 ins_encode %{ 1129 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1130 %} 1131 ins_pipe(pipe_slow); 1132%} 1133 1134instruct subF_reg(regF dst, regF src) %{ 1135 predicate((UseSSE>=1) && (UseAVX == 0)); 1136 match(Set dst (SubF dst src)); 1137 1138 format %{ "subss $dst, $src" %} 1139 ins_cost(150); 1140 ins_encode %{ 1141 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1142 %} 1143 ins_pipe(pipe_slow); 1144%} 1145 1146instruct subF_mem(regF dst, memory src) %{ 1147 predicate((UseSSE>=1) && (UseAVX == 0)); 1148 match(Set dst (SubF dst (LoadF src))); 1149 1150 format %{ "subss $dst, $src" %} 1151 ins_cost(150); 1152 ins_encode %{ 1153 __ subss($dst$$XMMRegister, $src$$Address); 1154 %} 1155 ins_pipe(pipe_slow); 1156%} 1157 1158instruct subF_imm(regF dst, immF con) %{ 1159 predicate((UseSSE>=1) && (UseAVX == 0)); 1160 match(Set dst (SubF dst con)); 1161 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1162 ins_cost(150); 1163 ins_encode %{ 1164 __ subss($dst$$XMMRegister, $constantaddress($con)); 1165 %} 1166 ins_pipe(pipe_slow); 1167%} 1168 1169instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1170 predicate(UseAVX > 0); 1171 match(Set dst (SubF src1 src2)); 1172 1173 format %{ "vsubss $dst, $src1, $src2" %} 1174 ins_cost(150); 1175 ins_encode %{ 1176 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1177 %} 1178 ins_pipe(pipe_slow); 1179%} 1180 1181instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1182 predicate(UseAVX > 0); 1183 match(Set dst (SubF src1 (LoadF src2))); 1184 1185 format %{ "vsubss $dst, $src1, $src2" %} 1186 ins_cost(150); 1187 ins_encode %{ 1188 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1189 %} 1190 ins_pipe(pipe_slow); 1191%} 1192 1193instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1194 predicate(UseAVX > 0); 1195 match(Set dst (SubF src con)); 1196 1197 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1198 ins_cost(150); 1199 ins_encode %{ 1200 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1201 %} 1202 ins_pipe(pipe_slow); 1203%} 1204 1205instruct subD_reg(regD dst, regD src) %{ 1206 predicate((UseSSE>=2) && (UseAVX == 0)); 1207 match(Set dst (SubD dst src)); 1208 1209 format %{ "subsd $dst, $src" %} 1210 ins_cost(150); 1211 ins_encode %{ 1212 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1213 %} 1214 ins_pipe(pipe_slow); 1215%} 1216 1217instruct subD_mem(regD dst, memory src) %{ 1218 predicate((UseSSE>=2) && (UseAVX == 0)); 1219 match(Set dst (SubD dst (LoadD src))); 1220 1221 format %{ "subsd $dst, $src" %} 1222 ins_cost(150); 1223 ins_encode %{ 1224 __ subsd($dst$$XMMRegister, $src$$Address); 1225 %} 1226 ins_pipe(pipe_slow); 1227%} 1228 1229instruct subD_imm(regD dst, immD con) %{ 1230 predicate((UseSSE>=2) && (UseAVX == 0)); 1231 match(Set dst (SubD dst con)); 1232 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1233 ins_cost(150); 1234 ins_encode %{ 1235 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1236 %} 1237 ins_pipe(pipe_slow); 1238%} 1239 1240instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1241 predicate(UseAVX > 0); 1242 match(Set dst (SubD src1 src2)); 1243 1244 format %{ "vsubsd $dst, $src1, $src2" %} 1245 ins_cost(150); 1246 ins_encode %{ 1247 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1248 %} 1249 ins_pipe(pipe_slow); 1250%} 1251 1252instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1253 predicate(UseAVX > 0); 1254 match(Set dst (SubD src1 (LoadD src2))); 1255 1256 format %{ "vsubsd $dst, $src1, $src2" %} 1257 ins_cost(150); 1258 ins_encode %{ 1259 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1260 %} 1261 ins_pipe(pipe_slow); 1262%} 1263 1264instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1265 predicate(UseAVX > 0); 1266 match(Set dst (SubD src con)); 1267 1268 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1269 ins_cost(150); 1270 ins_encode %{ 1271 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1272 %} 1273 ins_pipe(pipe_slow); 1274%} 1275 1276instruct mulF_reg(regF dst, regF src) %{ 1277 predicate((UseSSE>=1) && (UseAVX == 0)); 1278 match(Set dst (MulF dst src)); 1279 1280 format %{ "mulss $dst, $src" %} 1281 ins_cost(150); 1282 ins_encode %{ 1283 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1284 %} 1285 ins_pipe(pipe_slow); 1286%} 1287 1288instruct mulF_mem(regF dst, memory src) %{ 1289 predicate((UseSSE>=1) && (UseAVX == 0)); 1290 match(Set dst (MulF dst (LoadF src))); 1291 1292 format %{ "mulss $dst, $src" %} 1293 ins_cost(150); 1294 ins_encode %{ 1295 __ mulss($dst$$XMMRegister, $src$$Address); 1296 %} 1297 ins_pipe(pipe_slow); 1298%} 1299 1300instruct mulF_imm(regF dst, immF con) %{ 1301 predicate((UseSSE>=1) && (UseAVX == 0)); 1302 match(Set dst (MulF dst con)); 1303 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1304 ins_cost(150); 1305 ins_encode %{ 1306 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1307 %} 1308 ins_pipe(pipe_slow); 1309%} 1310 1311instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1312 predicate(UseAVX > 0); 1313 match(Set dst (MulF src1 src2)); 1314 1315 format %{ "vmulss $dst, $src1, $src2" %} 1316 ins_cost(150); 1317 ins_encode %{ 1318 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1319 %} 1320 ins_pipe(pipe_slow); 1321%} 1322 1323instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1324 predicate(UseAVX > 0); 1325 match(Set dst (MulF src1 (LoadF src2))); 1326 1327 format %{ "vmulss $dst, $src1, $src2" %} 1328 ins_cost(150); 1329 ins_encode %{ 1330 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1331 %} 1332 ins_pipe(pipe_slow); 1333%} 1334 1335instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1336 predicate(UseAVX > 0); 1337 match(Set dst (MulF src con)); 1338 1339 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1340 ins_cost(150); 1341 ins_encode %{ 1342 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1343 %} 1344 ins_pipe(pipe_slow); 1345%} 1346 1347instruct mulD_reg(regD dst, regD src) %{ 1348 predicate((UseSSE>=2) && (UseAVX == 0)); 1349 match(Set dst (MulD dst src)); 1350 1351 format %{ "mulsd $dst, $src" %} 1352 ins_cost(150); 1353 ins_encode %{ 1354 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1355 %} 1356 ins_pipe(pipe_slow); 1357%} 1358 1359instruct mulD_mem(regD dst, memory src) %{ 1360 predicate((UseSSE>=2) && (UseAVX == 0)); 1361 match(Set dst (MulD dst (LoadD src))); 1362 1363 format %{ "mulsd $dst, $src" %} 1364 ins_cost(150); 1365 ins_encode %{ 1366 __ mulsd($dst$$XMMRegister, $src$$Address); 1367 %} 1368 ins_pipe(pipe_slow); 1369%} 1370 1371instruct mulD_imm(regD dst, immD con) %{ 1372 predicate((UseSSE>=2) && (UseAVX == 0)); 1373 match(Set dst (MulD dst con)); 1374 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1375 ins_cost(150); 1376 ins_encode %{ 1377 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1378 %} 1379 ins_pipe(pipe_slow); 1380%} 1381 1382instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1383 predicate(UseAVX > 0); 1384 match(Set dst (MulD src1 src2)); 1385 1386 format %{ "vmulsd $dst, $src1, $src2" %} 1387 ins_cost(150); 1388 ins_encode %{ 1389 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1390 %} 1391 ins_pipe(pipe_slow); 1392%} 1393 1394instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1395 predicate(UseAVX > 0); 1396 match(Set dst (MulD src1 (LoadD src2))); 1397 1398 format %{ "vmulsd $dst, $src1, $src2" %} 1399 ins_cost(150); 1400 ins_encode %{ 1401 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1402 %} 1403 ins_pipe(pipe_slow); 1404%} 1405 1406instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1407 predicate(UseAVX > 0); 1408 match(Set dst (MulD src con)); 1409 1410 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1411 ins_cost(150); 1412 ins_encode %{ 1413 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1414 %} 1415 ins_pipe(pipe_slow); 1416%} 1417 1418instruct divF_reg(regF dst, regF src) %{ 1419 predicate((UseSSE>=1) && (UseAVX == 0)); 1420 match(Set dst (DivF dst src)); 1421 1422 format %{ "divss $dst, $src" %} 1423 ins_cost(150); 1424 ins_encode %{ 1425 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1426 %} 1427 ins_pipe(pipe_slow); 1428%} 1429 1430instruct divF_mem(regF dst, memory src) %{ 1431 predicate((UseSSE>=1) && (UseAVX == 0)); 1432 match(Set dst (DivF dst (LoadF src))); 1433 1434 format %{ "divss $dst, $src" %} 1435 ins_cost(150); 1436 ins_encode %{ 1437 __ divss($dst$$XMMRegister, $src$$Address); 1438 %} 1439 ins_pipe(pipe_slow); 1440%} 1441 1442instruct divF_imm(regF dst, immF con) %{ 1443 predicate((UseSSE>=1) && (UseAVX == 0)); 1444 match(Set dst (DivF dst con)); 1445 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1446 ins_cost(150); 1447 ins_encode %{ 1448 __ divss($dst$$XMMRegister, $constantaddress($con)); 1449 %} 1450 ins_pipe(pipe_slow); 1451%} 1452 1453instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1454 predicate(UseAVX > 0); 1455 match(Set dst (DivF src1 src2)); 1456 1457 format %{ "vdivss $dst, $src1, $src2" %} 1458 ins_cost(150); 1459 ins_encode %{ 1460 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1461 %} 1462 ins_pipe(pipe_slow); 1463%} 1464 1465instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1466 predicate(UseAVX > 0); 1467 match(Set dst (DivF src1 (LoadF src2))); 1468 1469 format %{ "vdivss $dst, $src1, $src2" %} 1470 ins_cost(150); 1471 ins_encode %{ 1472 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1473 %} 1474 ins_pipe(pipe_slow); 1475%} 1476 1477instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1478 predicate(UseAVX > 0); 1479 match(Set dst (DivF src con)); 1480 1481 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1482 ins_cost(150); 1483 ins_encode %{ 1484 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1485 %} 1486 ins_pipe(pipe_slow); 1487%} 1488 1489instruct divD_reg(regD dst, regD src) %{ 1490 predicate((UseSSE>=2) && (UseAVX == 0)); 1491 match(Set dst (DivD dst src)); 1492 1493 format %{ "divsd $dst, $src" %} 1494 ins_cost(150); 1495 ins_encode %{ 1496 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1497 %} 1498 ins_pipe(pipe_slow); 1499%} 1500 1501instruct divD_mem(regD dst, memory src) %{ 1502 predicate((UseSSE>=2) && (UseAVX == 0)); 1503 match(Set dst (DivD dst (LoadD src))); 1504 1505 format %{ "divsd $dst, $src" %} 1506 ins_cost(150); 1507 ins_encode %{ 1508 __ divsd($dst$$XMMRegister, $src$$Address); 1509 %} 1510 ins_pipe(pipe_slow); 1511%} 1512 1513instruct divD_imm(regD dst, immD con) %{ 1514 predicate((UseSSE>=2) && (UseAVX == 0)); 1515 match(Set dst (DivD dst con)); 1516 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1517 ins_cost(150); 1518 ins_encode %{ 1519 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1520 %} 1521 ins_pipe(pipe_slow); 1522%} 1523 1524instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1525 predicate(UseAVX > 0); 1526 match(Set dst (DivD src1 src2)); 1527 1528 format %{ "vdivsd $dst, $src1, $src2" %} 1529 ins_cost(150); 1530 ins_encode %{ 1531 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1532 %} 1533 ins_pipe(pipe_slow); 1534%} 1535 1536instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1537 predicate(UseAVX > 0); 1538 match(Set dst (DivD src1 (LoadD src2))); 1539 1540 format %{ "vdivsd $dst, $src1, $src2" %} 1541 ins_cost(150); 1542 ins_encode %{ 1543 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1544 %} 1545 ins_pipe(pipe_slow); 1546%} 1547 1548instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1549 predicate(UseAVX > 0); 1550 match(Set dst (DivD src con)); 1551 1552 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1553 ins_cost(150); 1554 ins_encode %{ 1555 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1556 %} 1557 ins_pipe(pipe_slow); 1558%} 1559 1560instruct absF_reg(regF dst) %{ 1561 predicate((UseSSE>=1) && (UseAVX == 0)); 1562 match(Set dst (AbsF dst)); 1563 ins_cost(150); 1564 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1565 ins_encode %{ 1566 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1567 %} 1568 ins_pipe(pipe_slow); 1569%} 1570 1571instruct absF_reg_reg(regF dst, regF src) %{ 1572 predicate(UseAVX > 0); 1573 match(Set dst (AbsF src)); 1574 ins_cost(150); 1575 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1576 ins_encode %{ 1577 bool vector256 = false; 1578 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1579 ExternalAddress(float_signmask()), vector256); 1580 %} 1581 ins_pipe(pipe_slow); 1582%} 1583 1584instruct absD_reg(regD dst) %{ 1585 predicate((UseSSE>=2) && (UseAVX == 0)); 1586 match(Set dst (AbsD dst)); 1587 ins_cost(150); 1588 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1589 "# abs double by sign masking" %} 1590 ins_encode %{ 1591 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1592 %} 1593 ins_pipe(pipe_slow); 1594%} 1595 1596instruct absD_reg_reg(regD dst, regD src) %{ 1597 predicate(UseAVX > 0); 1598 match(Set dst (AbsD src)); 1599 ins_cost(150); 1600 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1601 "# abs double by sign masking" %} 1602 ins_encode %{ 1603 bool vector256 = false; 1604 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1605 ExternalAddress(double_signmask()), vector256); 1606 %} 1607 ins_pipe(pipe_slow); 1608%} 1609 1610instruct negF_reg(regF dst) %{ 1611 predicate((UseSSE>=1) && (UseAVX == 0)); 1612 match(Set dst (NegF dst)); 1613 ins_cost(150); 1614 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1615 ins_encode %{ 1616 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1617 %} 1618 ins_pipe(pipe_slow); 1619%} 1620 1621instruct negF_reg_reg(regF dst, regF src) %{ 1622 predicate(UseAVX > 0); 1623 match(Set dst (NegF src)); 1624 ins_cost(150); 1625 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1626 ins_encode %{ 1627 bool vector256 = false; 1628 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1629 ExternalAddress(float_signflip()), vector256); 1630 %} 1631 ins_pipe(pipe_slow); 1632%} 1633 1634instruct negD_reg(regD dst) %{ 1635 predicate((UseSSE>=2) && (UseAVX == 0)); 1636 match(Set dst (NegD dst)); 1637 ins_cost(150); 1638 format %{ "xorpd $dst, [0x8000000000000000]\t" 1639 "# neg double by sign flipping" %} 1640 ins_encode %{ 1641 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1642 %} 1643 ins_pipe(pipe_slow); 1644%} 1645 1646instruct negD_reg_reg(regD dst, regD src) %{ 1647 predicate(UseAVX > 0); 1648 match(Set dst (NegD src)); 1649 ins_cost(150); 1650 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1651 "# neg double by sign flipping" %} 1652 ins_encode %{ 1653 bool vector256 = false; 1654 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1655 ExternalAddress(double_signflip()), vector256); 1656 %} 1657 ins_pipe(pipe_slow); 1658%} 1659 1660instruct sqrtF_reg(regF dst, regF src) %{ 1661 predicate(UseSSE>=1); 1662 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1663 1664 format %{ "sqrtss $dst, $src" %} 1665 ins_cost(150); 1666 ins_encode %{ 1667 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1668 %} 1669 ins_pipe(pipe_slow); 1670%} 1671 1672instruct sqrtF_mem(regF dst, memory src) %{ 1673 predicate(UseSSE>=1); 1674 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1675 1676 format %{ "sqrtss $dst, $src" %} 1677 ins_cost(150); 1678 ins_encode %{ 1679 __ sqrtss($dst$$XMMRegister, $src$$Address); 1680 %} 1681 ins_pipe(pipe_slow); 1682%} 1683 1684instruct sqrtF_imm(regF dst, immF con) %{ 1685 predicate(UseSSE>=1); 1686 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1687 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1688 ins_cost(150); 1689 ins_encode %{ 1690 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1691 %} 1692 ins_pipe(pipe_slow); 1693%} 1694 1695instruct sqrtD_reg(regD dst, regD src) %{ 1696 predicate(UseSSE>=2); 1697 match(Set dst (SqrtD src)); 1698 1699 format %{ "sqrtsd $dst, $src" %} 1700 ins_cost(150); 1701 ins_encode %{ 1702 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1703 %} 1704 ins_pipe(pipe_slow); 1705%} 1706 1707instruct sqrtD_mem(regD dst, memory src) %{ 1708 predicate(UseSSE>=2); 1709 match(Set dst (SqrtD (LoadD src))); 1710 1711 format %{ "sqrtsd $dst, $src" %} 1712 ins_cost(150); 1713 ins_encode %{ 1714 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1715 %} 1716 ins_pipe(pipe_slow); 1717%} 1718 1719instruct sqrtD_imm(regD dst, immD con) %{ 1720 predicate(UseSSE>=2); 1721 match(Set dst (SqrtD con)); 1722 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1723 ins_cost(150); 1724 ins_encode %{ 1725 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1726 %} 1727 ins_pipe(pipe_slow); 1728%} 1729 1730 1731// ====================VECTOR INSTRUCTIONS===================================== 1732 1733// Load vectors (4 bytes long) 1734instruct loadV4(vecS dst, memory mem) %{ 1735 predicate(n->as_LoadVector()->memory_size() == 4); 1736 match(Set dst (LoadVector mem)); 1737 ins_cost(125); 1738 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1739 ins_encode %{ 1740 __ movdl($dst$$XMMRegister, $mem$$Address); 1741 %} 1742 ins_pipe( pipe_slow ); 1743%} 1744 1745// Load vectors (8 bytes long) 1746instruct loadV8(vecD dst, memory mem) %{ 1747 predicate(n->as_LoadVector()->memory_size() == 8); 1748 match(Set dst (LoadVector mem)); 1749 ins_cost(125); 1750 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1751 ins_encode %{ 1752 __ movq($dst$$XMMRegister, $mem$$Address); 1753 %} 1754 ins_pipe( pipe_slow ); 1755%} 1756 1757// Load vectors (16 bytes long) 1758instruct loadV16(vecX dst, memory mem) %{ 1759 predicate(n->as_LoadVector()->memory_size() == 16); 1760 match(Set dst (LoadVector mem)); 1761 ins_cost(125); 1762 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1763 ins_encode %{ 1764 __ movdqu($dst$$XMMRegister, $mem$$Address); 1765 %} 1766 ins_pipe( pipe_slow ); 1767%} 1768 1769// Load vectors (32 bytes long) 1770instruct loadV32(vecY dst, memory mem) %{ 1771 predicate(n->as_LoadVector()->memory_size() == 32); 1772 match(Set dst (LoadVector mem)); 1773 ins_cost(125); 1774 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1775 ins_encode %{ 1776 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1777 %} 1778 ins_pipe( pipe_slow ); 1779%} 1780 1781// Store vectors 1782instruct storeV4(memory mem, vecS src) %{ 1783 predicate(n->as_StoreVector()->memory_size() == 4); 1784 match(Set mem (StoreVector mem src)); 1785 ins_cost(145); 1786 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1787 ins_encode %{ 1788 __ movdl($mem$$Address, $src$$XMMRegister); 1789 %} 1790 ins_pipe( pipe_slow ); 1791%} 1792 1793instruct storeV8(memory mem, vecD src) %{ 1794 predicate(n->as_StoreVector()->memory_size() == 8); 1795 match(Set mem (StoreVector mem src)); 1796 ins_cost(145); 1797 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1798 ins_encode %{ 1799 __ movq($mem$$Address, $src$$XMMRegister); 1800 %} 1801 ins_pipe( pipe_slow ); 1802%} 1803 1804instruct storeV16(memory mem, vecX src) %{ 1805 predicate(n->as_StoreVector()->memory_size() == 16); 1806 match(Set mem (StoreVector mem src)); 1807 ins_cost(145); 1808 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1809 ins_encode %{ 1810 __ movdqu($mem$$Address, $src$$XMMRegister); 1811 %} 1812 ins_pipe( pipe_slow ); 1813%} 1814 1815instruct storeV32(memory mem, vecY src) %{ 1816 predicate(n->as_StoreVector()->memory_size() == 32); 1817 match(Set mem (StoreVector mem src)); 1818 ins_cost(145); 1819 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1820 ins_encode %{ 1821 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1822 %} 1823 ins_pipe( pipe_slow ); 1824%} 1825 1826// Replicate byte scalar to be vector 1827instruct Repl4B(vecS dst, rRegI src) %{ 1828 predicate(n->as_Vector()->length() == 4); 1829 match(Set dst (ReplicateB src)); 1830 format %{ "movd $dst,$src\n\t" 1831 "punpcklbw $dst,$dst\n\t" 1832 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1833 ins_encode %{ 1834 __ movdl($dst$$XMMRegister, $src$$Register); 1835 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1836 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1837 %} 1838 ins_pipe( pipe_slow ); 1839%} 1840 1841instruct Repl8B(vecD dst, rRegI src) %{ 1842 predicate(n->as_Vector()->length() == 8); 1843 match(Set dst (ReplicateB src)); 1844 format %{ "movd $dst,$src\n\t" 1845 "punpcklbw $dst,$dst\n\t" 1846 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1847 ins_encode %{ 1848 __ movdl($dst$$XMMRegister, $src$$Register); 1849 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1850 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1851 %} 1852 ins_pipe( pipe_slow ); 1853%} 1854 1855instruct Repl16B(vecX dst, rRegI src) %{ 1856 predicate(n->as_Vector()->length() == 16); 1857 match(Set dst (ReplicateB src)); 1858 format %{ "movd $dst,$src\n\t" 1859 "punpcklbw $dst,$dst\n\t" 1860 "pshuflw $dst,$dst,0x00\n\t" 1861 "punpcklqdq $dst,$dst\t! replicate16B" %} 1862 ins_encode %{ 1863 __ movdl($dst$$XMMRegister, $src$$Register); 1864 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1865 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1866 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1867 %} 1868 ins_pipe( pipe_slow ); 1869%} 1870 1871instruct Repl32B(vecY dst, rRegI src) %{ 1872 predicate(n->as_Vector()->length() == 32); 1873 match(Set dst (ReplicateB src)); 1874 format %{ "movd $dst,$src\n\t" 1875 "punpcklbw $dst,$dst\n\t" 1876 "pshuflw $dst,$dst,0x00\n\t" 1877 "punpcklqdq $dst,$dst\n\t" 1878 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1879 ins_encode %{ 1880 __ movdl($dst$$XMMRegister, $src$$Register); 1881 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1882 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1883 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1884 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1885 %} 1886 ins_pipe( pipe_slow ); 1887%} 1888 1889// Replicate byte scalar immediate to be vector by loading from const table. 1890instruct Repl4B_imm(vecS dst, immI con) %{ 1891 predicate(n->as_Vector()->length() == 4); 1892 match(Set dst (ReplicateB con)); 1893 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1894 ins_encode %{ 1895 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1896 %} 1897 ins_pipe( pipe_slow ); 1898%} 1899 1900instruct Repl8B_imm(vecD dst, immI con) %{ 1901 predicate(n->as_Vector()->length() == 8); 1902 match(Set dst (ReplicateB con)); 1903 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1904 ins_encode %{ 1905 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1906 %} 1907 ins_pipe( pipe_slow ); 1908%} 1909 1910instruct Repl16B_imm(vecX dst, immI con) %{ 1911 predicate(n->as_Vector()->length() == 16); 1912 match(Set dst (ReplicateB con)); 1913 format %{ "movq $dst,[$constantaddress]\n\t" 1914 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1915 ins_encode %{ 1916 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1917 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1918 %} 1919 ins_pipe( pipe_slow ); 1920%} 1921 1922instruct Repl32B_imm(vecY dst, immI con) %{ 1923 predicate(n->as_Vector()->length() == 32); 1924 match(Set dst (ReplicateB con)); 1925 format %{ "movq $dst,[$constantaddress]\n\t" 1926 "punpcklqdq $dst,$dst\n\t" 1927 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1928 ins_encode %{ 1929 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1930 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1931 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1932 %} 1933 ins_pipe( pipe_slow ); 1934%} 1935 1936// Replicate byte scalar zero to be vector 1937instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1938 predicate(n->as_Vector()->length() == 4); 1939 match(Set dst (ReplicateB zero)); 1940 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1941 ins_encode %{ 1942 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1943 %} 1944 ins_pipe( fpu_reg_reg ); 1945%} 1946 1947instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1948 predicate(n->as_Vector()->length() == 8); 1949 match(Set dst (ReplicateB zero)); 1950 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1951 ins_encode %{ 1952 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1953 %} 1954 ins_pipe( fpu_reg_reg ); 1955%} 1956 1957instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1958 predicate(n->as_Vector()->length() == 16); 1959 match(Set dst (ReplicateB zero)); 1960 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1961 ins_encode %{ 1962 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1963 %} 1964 ins_pipe( fpu_reg_reg ); 1965%} 1966 1967instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1968 predicate(n->as_Vector()->length() == 32); 1969 match(Set dst (ReplicateB zero)); 1970 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1971 ins_encode %{ 1972 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1973 bool vector256 = true; 1974 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1975 %} 1976 ins_pipe( fpu_reg_reg ); 1977%} 1978 1979// Replicate char/short (2 byte) scalar to be vector 1980instruct Repl2S(vecS dst, rRegI src) %{ 1981 predicate(n->as_Vector()->length() == 2); 1982 match(Set dst (ReplicateS src)); 1983 format %{ "movd $dst,$src\n\t" 1984 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1985 ins_encode %{ 1986 __ movdl($dst$$XMMRegister, $src$$Register); 1987 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1988 %} 1989 ins_pipe( fpu_reg_reg ); 1990%} 1991 1992instruct Repl4S(vecD dst, rRegI src) %{ 1993 predicate(n->as_Vector()->length() == 4); 1994 match(Set dst (ReplicateS src)); 1995 format %{ "movd $dst,$src\n\t" 1996 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1997 ins_encode %{ 1998 __ movdl($dst$$XMMRegister, $src$$Register); 1999 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2000 %} 2001 ins_pipe( fpu_reg_reg ); 2002%} 2003 2004instruct Repl8S(vecX dst, rRegI src) %{ 2005 predicate(n->as_Vector()->length() == 8); 2006 match(Set dst (ReplicateS src)); 2007 format %{ "movd $dst,$src\n\t" 2008 "pshuflw $dst,$dst,0x00\n\t" 2009 "punpcklqdq $dst,$dst\t! replicate8S" %} 2010 ins_encode %{ 2011 __ movdl($dst$$XMMRegister, $src$$Register); 2012 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2013 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2014 %} 2015 ins_pipe( pipe_slow ); 2016%} 2017 2018instruct Repl16S(vecY dst, rRegI src) %{ 2019 predicate(n->as_Vector()->length() == 16); 2020 match(Set dst (ReplicateS src)); 2021 format %{ "movd $dst,$src\n\t" 2022 "pshuflw $dst,$dst,0x00\n\t" 2023 "punpcklqdq $dst,$dst\n\t" 2024 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 2025 ins_encode %{ 2026 __ movdl($dst$$XMMRegister, $src$$Register); 2027 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2028 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2029 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2030 %} 2031 ins_pipe( pipe_slow ); 2032%} 2033 2034// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 2035instruct Repl2S_imm(vecS dst, immI con) %{ 2036 predicate(n->as_Vector()->length() == 2); 2037 match(Set dst (ReplicateS con)); 2038 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 2039 ins_encode %{ 2040 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2041 %} 2042 ins_pipe( fpu_reg_reg ); 2043%} 2044 2045instruct Repl4S_imm(vecD dst, immI con) %{ 2046 predicate(n->as_Vector()->length() == 4); 2047 match(Set dst (ReplicateS con)); 2048 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 2049 ins_encode %{ 2050 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2051 %} 2052 ins_pipe( fpu_reg_reg ); 2053%} 2054 2055instruct Repl8S_imm(vecX dst, immI con) %{ 2056 predicate(n->as_Vector()->length() == 8); 2057 match(Set dst (ReplicateS con)); 2058 format %{ "movq $dst,[$constantaddress]\n\t" 2059 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 2060 ins_encode %{ 2061 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2062 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2063 %} 2064 ins_pipe( pipe_slow ); 2065%} 2066 2067instruct Repl16S_imm(vecY dst, immI con) %{ 2068 predicate(n->as_Vector()->length() == 16); 2069 match(Set dst (ReplicateS con)); 2070 format %{ "movq $dst,[$constantaddress]\n\t" 2071 "punpcklqdq $dst,$dst\n\t" 2072 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 2073 ins_encode %{ 2074 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2075 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2076 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2077 %} 2078 ins_pipe( pipe_slow ); 2079%} 2080 2081// Replicate char/short (2 byte) scalar zero to be vector 2082instruct Repl2S_zero(vecS dst, immI0 zero) %{ 2083 predicate(n->as_Vector()->length() == 2); 2084 match(Set dst (ReplicateS zero)); 2085 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 2086 ins_encode %{ 2087 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2088 %} 2089 ins_pipe( fpu_reg_reg ); 2090%} 2091 2092instruct Repl4S_zero(vecD dst, immI0 zero) %{ 2093 predicate(n->as_Vector()->length() == 4); 2094 match(Set dst (ReplicateS zero)); 2095 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 2096 ins_encode %{ 2097 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2098 %} 2099 ins_pipe( fpu_reg_reg ); 2100%} 2101 2102instruct Repl8S_zero(vecX dst, immI0 zero) %{ 2103 predicate(n->as_Vector()->length() == 8); 2104 match(Set dst (ReplicateS zero)); 2105 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 2106 ins_encode %{ 2107 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2108 %} 2109 ins_pipe( fpu_reg_reg ); 2110%} 2111 2112instruct Repl16S_zero(vecY dst, immI0 zero) %{ 2113 predicate(n->as_Vector()->length() == 16); 2114 match(Set dst (ReplicateS zero)); 2115 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 2116 ins_encode %{ 2117 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2118 bool vector256 = true; 2119 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2120 %} 2121 ins_pipe( fpu_reg_reg ); 2122%} 2123 2124// Replicate integer (4 byte) scalar to be vector 2125instruct Repl2I(vecD dst, rRegI src) %{ 2126 predicate(n->as_Vector()->length() == 2); 2127 match(Set dst (ReplicateI src)); 2128 format %{ "movd $dst,$src\n\t" 2129 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2130 ins_encode %{ 2131 __ movdl($dst$$XMMRegister, $src$$Register); 2132 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2133 %} 2134 ins_pipe( fpu_reg_reg ); 2135%} 2136 2137instruct Repl4I(vecX dst, rRegI src) %{ 2138 predicate(n->as_Vector()->length() == 4); 2139 match(Set dst (ReplicateI src)); 2140 format %{ "movd $dst,$src\n\t" 2141 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2142 ins_encode %{ 2143 __ movdl($dst$$XMMRegister, $src$$Register); 2144 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2145 %} 2146 ins_pipe( pipe_slow ); 2147%} 2148 2149instruct Repl8I(vecY dst, rRegI src) %{ 2150 predicate(n->as_Vector()->length() == 8); 2151 match(Set dst (ReplicateI src)); 2152 format %{ "movd $dst,$src\n\t" 2153 "pshufd $dst,$dst,0x00\n\t" 2154 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2155 ins_encode %{ 2156 __ movdl($dst$$XMMRegister, $src$$Register); 2157 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2158 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2159 %} 2160 ins_pipe( pipe_slow ); 2161%} 2162 2163// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2164instruct Repl2I_imm(vecD dst, immI con) %{ 2165 predicate(n->as_Vector()->length() == 2); 2166 match(Set dst (ReplicateI con)); 2167 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2168 ins_encode %{ 2169 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2170 %} 2171 ins_pipe( fpu_reg_reg ); 2172%} 2173 2174instruct Repl4I_imm(vecX dst, immI con) %{ 2175 predicate(n->as_Vector()->length() == 4); 2176 match(Set dst (ReplicateI con)); 2177 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2178 "punpcklqdq $dst,$dst" %} 2179 ins_encode %{ 2180 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2181 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2182 %} 2183 ins_pipe( pipe_slow ); 2184%} 2185 2186instruct Repl8I_imm(vecY dst, immI con) %{ 2187 predicate(n->as_Vector()->length() == 8); 2188 match(Set dst (ReplicateI con)); 2189 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2190 "punpcklqdq $dst,$dst\n\t" 2191 "vinserti128h $dst,$dst,$dst" %} 2192 ins_encode %{ 2193 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2194 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2195 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2196 %} 2197 ins_pipe( pipe_slow ); 2198%} 2199 2200// Integer could be loaded into xmm register directly from memory. 2201instruct Repl2I_mem(vecD dst, memory mem) %{ 2202 predicate(n->as_Vector()->length() == 2); 2203 match(Set dst (ReplicateI (LoadI mem))); 2204 format %{ "movd $dst,$mem\n\t" 2205 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2206 ins_encode %{ 2207 __ movdl($dst$$XMMRegister, $mem$$Address); 2208 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2209 %} 2210 ins_pipe( fpu_reg_reg ); 2211%} 2212 2213instruct Repl4I_mem(vecX dst, memory mem) %{ 2214 predicate(n->as_Vector()->length() == 4); 2215 match(Set dst (ReplicateI (LoadI mem))); 2216 format %{ "movd $dst,$mem\n\t" 2217 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2218 ins_encode %{ 2219 __ movdl($dst$$XMMRegister, $mem$$Address); 2220 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2221 %} 2222 ins_pipe( pipe_slow ); 2223%} 2224 2225instruct Repl8I_mem(vecY dst, memory mem) %{ 2226 predicate(n->as_Vector()->length() == 8); 2227 match(Set dst (ReplicateI (LoadI mem))); 2228 format %{ "movd $dst,$mem\n\t" 2229 "pshufd $dst,$dst,0x00\n\t" 2230 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2231 ins_encode %{ 2232 __ movdl($dst$$XMMRegister, $mem$$Address); 2233 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2234 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2235 %} 2236 ins_pipe( pipe_slow ); 2237%} 2238 2239// Replicate integer (4 byte) scalar zero to be vector 2240instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2241 predicate(n->as_Vector()->length() == 2); 2242 match(Set dst (ReplicateI zero)); 2243 format %{ "pxor $dst,$dst\t! replicate2I" %} 2244 ins_encode %{ 2245 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2246 %} 2247 ins_pipe( fpu_reg_reg ); 2248%} 2249 2250instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2251 predicate(n->as_Vector()->length() == 4); 2252 match(Set dst (ReplicateI zero)); 2253 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2254 ins_encode %{ 2255 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2256 %} 2257 ins_pipe( fpu_reg_reg ); 2258%} 2259 2260instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2261 predicate(n->as_Vector()->length() == 8); 2262 match(Set dst (ReplicateI zero)); 2263 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2264 ins_encode %{ 2265 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2266 bool vector256 = true; 2267 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2268 %} 2269 ins_pipe( fpu_reg_reg ); 2270%} 2271 2272// Replicate long (8 byte) scalar to be vector 2273#ifdef _LP64 2274instruct Repl2L(vecX dst, rRegL src) %{ 2275 predicate(n->as_Vector()->length() == 2); 2276 match(Set dst (ReplicateL src)); 2277 format %{ "movdq $dst,$src\n\t" 2278 "punpcklqdq $dst,$dst\t! replicate2L" %} 2279 ins_encode %{ 2280 __ movdq($dst$$XMMRegister, $src$$Register); 2281 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2282 %} 2283 ins_pipe( pipe_slow ); 2284%} 2285 2286instruct Repl4L(vecY dst, rRegL src) %{ 2287 predicate(n->as_Vector()->length() == 4); 2288 match(Set dst (ReplicateL src)); 2289 format %{ "movdq $dst,$src\n\t" 2290 "punpcklqdq $dst,$dst\n\t" 2291 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2292 ins_encode %{ 2293 __ movdq($dst$$XMMRegister, $src$$Register); 2294 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2295 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2296 %} 2297 ins_pipe( pipe_slow ); 2298%} 2299#else // _LP64 2300instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2301 predicate(n->as_Vector()->length() == 2); 2302 match(Set dst (ReplicateL src)); 2303 effect(TEMP dst, USE src, TEMP tmp); 2304 format %{ "movdl $dst,$src.lo\n\t" 2305 "movdl $tmp,$src.hi\n\t" 2306 "punpckldq $dst,$tmp\n\t" 2307 "punpcklqdq $dst,$dst\t! replicate2L"%} 2308 ins_encode %{ 2309 __ movdl($dst$$XMMRegister, $src$$Register); 2310 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2311 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2312 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2313 %} 2314 ins_pipe( pipe_slow ); 2315%} 2316 2317instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2318 predicate(n->as_Vector()->length() == 4); 2319 match(Set dst (ReplicateL src)); 2320 effect(TEMP dst, USE src, TEMP tmp); 2321 format %{ "movdl $dst,$src.lo\n\t" 2322 "movdl $tmp,$src.hi\n\t" 2323 "punpckldq $dst,$tmp\n\t" 2324 "punpcklqdq $dst,$dst\n\t" 2325 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2326 ins_encode %{ 2327 __ movdl($dst$$XMMRegister, $src$$Register); 2328 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2329 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2330 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2331 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2332 %} 2333 ins_pipe( pipe_slow ); 2334%} 2335#endif // _LP64 2336 2337// Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2338instruct Repl2L_imm(vecX dst, immL con) %{ 2339 predicate(n->as_Vector()->length() == 2); 2340 match(Set dst (ReplicateL con)); 2341 format %{ "movq $dst,[$constantaddress]\n\t" 2342 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2343 ins_encode %{ 2344 __ movq($dst$$XMMRegister, $constantaddress($con)); 2345 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2346 %} 2347 ins_pipe( pipe_slow ); 2348%} 2349 2350instruct Repl4L_imm(vecY dst, immL con) %{ 2351 predicate(n->as_Vector()->length() == 4); 2352 match(Set dst (ReplicateL con)); 2353 format %{ "movq $dst,[$constantaddress]\n\t" 2354 "punpcklqdq $dst,$dst\n\t" 2355 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2356 ins_encode %{ 2357 __ movq($dst$$XMMRegister, $constantaddress($con)); 2358 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2359 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2360 %} 2361 ins_pipe( pipe_slow ); 2362%} 2363 2364// Long could be loaded into xmm register directly from memory. 2365instruct Repl2L_mem(vecX dst, memory mem) %{ 2366 predicate(n->as_Vector()->length() == 2); 2367 match(Set dst (ReplicateL (LoadL mem))); 2368 format %{ "movq $dst,$mem\n\t" 2369 "punpcklqdq $dst,$dst\t! replicate2L" %} 2370 ins_encode %{ 2371 __ movq($dst$$XMMRegister, $mem$$Address); 2372 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2373 %} 2374 ins_pipe( pipe_slow ); 2375%} 2376 2377instruct Repl4L_mem(vecY dst, memory mem) %{ 2378 predicate(n->as_Vector()->length() == 4); 2379 match(Set dst (ReplicateL (LoadL mem))); 2380 format %{ "movq $dst,$mem\n\t" 2381 "punpcklqdq $dst,$dst\n\t" 2382 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2383 ins_encode %{ 2384 __ movq($dst$$XMMRegister, $mem$$Address); 2385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2386 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2387 %} 2388 ins_pipe( pipe_slow ); 2389%} 2390 2391// Replicate long (8 byte) scalar zero to be vector 2392instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2393 predicate(n->as_Vector()->length() == 2); 2394 match(Set dst (ReplicateL zero)); 2395 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2396 ins_encode %{ 2397 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2398 %} 2399 ins_pipe( fpu_reg_reg ); 2400%} 2401 2402instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2403 predicate(n->as_Vector()->length() == 4); 2404 match(Set dst (ReplicateL zero)); 2405 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2406 ins_encode %{ 2407 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2408 bool vector256 = true; 2409 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2410 %} 2411 ins_pipe( fpu_reg_reg ); 2412%} 2413 2414// Replicate float (4 byte) scalar to be vector 2415instruct Repl2F(vecD dst, regF src) %{ 2416 predicate(n->as_Vector()->length() == 2); 2417 match(Set dst (ReplicateF src)); 2418 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2419 ins_encode %{ 2420 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2421 %} 2422 ins_pipe( fpu_reg_reg ); 2423%} 2424 2425instruct Repl4F(vecX dst, regF src) %{ 2426 predicate(n->as_Vector()->length() == 4); 2427 match(Set dst (ReplicateF src)); 2428 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2429 ins_encode %{ 2430 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2431 %} 2432 ins_pipe( pipe_slow ); 2433%} 2434 2435instruct Repl8F(vecY dst, regF src) %{ 2436 predicate(n->as_Vector()->length() == 8); 2437 match(Set dst (ReplicateF src)); 2438 format %{ "pshufd $dst,$src,0x00\n\t" 2439 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2440 ins_encode %{ 2441 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2442 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2443 %} 2444 ins_pipe( pipe_slow ); 2445%} 2446 2447// Replicate float (4 byte) scalar zero to be vector 2448instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2449 predicate(n->as_Vector()->length() == 2); 2450 match(Set dst (ReplicateF zero)); 2451 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2452 ins_encode %{ 2453 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2454 %} 2455 ins_pipe( fpu_reg_reg ); 2456%} 2457 2458instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2459 predicate(n->as_Vector()->length() == 4); 2460 match(Set dst (ReplicateF zero)); 2461 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2462 ins_encode %{ 2463 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2464 %} 2465 ins_pipe( fpu_reg_reg ); 2466%} 2467 2468instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2469 predicate(n->as_Vector()->length() == 8); 2470 match(Set dst (ReplicateF zero)); 2471 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2472 ins_encode %{ 2473 bool vector256 = true; 2474 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2475 %} 2476 ins_pipe( fpu_reg_reg ); 2477%} 2478 2479// Replicate double (8 bytes) scalar to be vector 2480instruct Repl2D(vecX dst, regD src) %{ 2481 predicate(n->as_Vector()->length() == 2); 2482 match(Set dst (ReplicateD src)); 2483 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2484 ins_encode %{ 2485 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2486 %} 2487 ins_pipe( pipe_slow ); 2488%} 2489 2490instruct Repl4D(vecY dst, regD src) %{ 2491 predicate(n->as_Vector()->length() == 4); 2492 match(Set dst (ReplicateD src)); 2493 format %{ "pshufd $dst,$src,0x44\n\t" 2494 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2495 ins_encode %{ 2496 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2497 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2498 %} 2499 ins_pipe( pipe_slow ); 2500%} 2501 2502// Replicate double (8 byte) scalar zero to be vector 2503instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2504 predicate(n->as_Vector()->length() == 2); 2505 match(Set dst (ReplicateD zero)); 2506 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2507 ins_encode %{ 2508 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2509 %} 2510 ins_pipe( fpu_reg_reg ); 2511%} 2512 2513instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2514 predicate(n->as_Vector()->length() == 4); 2515 match(Set dst (ReplicateD zero)); 2516 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2517 ins_encode %{ 2518 bool vector256 = true; 2519 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2520 %} 2521 ins_pipe( fpu_reg_reg ); 2522%} 2523 2524// ====================VECTOR ARITHMETIC======================================= 2525 2526// --------------------------------- ADD -------------------------------------- 2527 2528// Bytes vector add 2529instruct vadd4B(vecS dst, vecS src) %{ 2530 predicate(n->as_Vector()->length() == 4); 2531 match(Set dst (AddVB dst src)); 2532 format %{ "paddb $dst,$src\t! add packed4B" %} 2533 ins_encode %{ 2534 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2535 %} 2536 ins_pipe( pipe_slow ); 2537%} 2538 2539instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2540 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2541 match(Set dst (AddVB src1 src2)); 2542 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2543 ins_encode %{ 2544 bool vector256 = false; 2545 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2546 %} 2547 ins_pipe( pipe_slow ); 2548%} 2549 2550instruct vadd8B(vecD dst, vecD src) %{ 2551 predicate(n->as_Vector()->length() == 8); 2552 match(Set dst (AddVB dst src)); 2553 format %{ "paddb $dst,$src\t! add packed8B" %} 2554 ins_encode %{ 2555 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2556 %} 2557 ins_pipe( pipe_slow ); 2558%} 2559 2560instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2561 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2562 match(Set dst (AddVB src1 src2)); 2563 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2564 ins_encode %{ 2565 bool vector256 = false; 2566 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2567 %} 2568 ins_pipe( pipe_slow ); 2569%} 2570 2571instruct vadd16B(vecX dst, vecX src) %{ 2572 predicate(n->as_Vector()->length() == 16); 2573 match(Set dst (AddVB dst src)); 2574 format %{ "paddb $dst,$src\t! add packed16B" %} 2575 ins_encode %{ 2576 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2577 %} 2578 ins_pipe( pipe_slow ); 2579%} 2580 2581instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2582 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2583 match(Set dst (AddVB src1 src2)); 2584 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2585 ins_encode %{ 2586 bool vector256 = false; 2587 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2588 %} 2589 ins_pipe( pipe_slow ); 2590%} 2591 2592instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2593 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2594 match(Set dst (AddVB src (LoadVector mem))); 2595 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2596 ins_encode %{ 2597 bool vector256 = false; 2598 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2599 %} 2600 ins_pipe( pipe_slow ); 2601%} 2602 2603instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2604 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2605 match(Set dst (AddVB src1 src2)); 2606 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2607 ins_encode %{ 2608 bool vector256 = true; 2609 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2610 %} 2611 ins_pipe( pipe_slow ); 2612%} 2613 2614instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2615 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2616 match(Set dst (AddVB src (LoadVector mem))); 2617 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2618 ins_encode %{ 2619 bool vector256 = true; 2620 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2621 %} 2622 ins_pipe( pipe_slow ); 2623%} 2624 2625// Shorts/Chars vector add 2626instruct vadd2S(vecS dst, vecS src) %{ 2627 predicate(n->as_Vector()->length() == 2); 2628 match(Set dst (AddVS dst src)); 2629 format %{ "paddw $dst,$src\t! add packed2S" %} 2630 ins_encode %{ 2631 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2632 %} 2633 ins_pipe( pipe_slow ); 2634%} 2635 2636instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2637 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2638 match(Set dst (AddVS src1 src2)); 2639 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2640 ins_encode %{ 2641 bool vector256 = false; 2642 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2643 %} 2644 ins_pipe( pipe_slow ); 2645%} 2646 2647instruct vadd4S(vecD dst, vecD src) %{ 2648 predicate(n->as_Vector()->length() == 4); 2649 match(Set dst (AddVS dst src)); 2650 format %{ "paddw $dst,$src\t! add packed4S" %} 2651 ins_encode %{ 2652 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2653 %} 2654 ins_pipe( pipe_slow ); 2655%} 2656 2657instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2658 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2659 match(Set dst (AddVS src1 src2)); 2660 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2661 ins_encode %{ 2662 bool vector256 = false; 2663 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2664 %} 2665 ins_pipe( pipe_slow ); 2666%} 2667 2668instruct vadd8S(vecX dst, vecX src) %{ 2669 predicate(n->as_Vector()->length() == 8); 2670 match(Set dst (AddVS dst src)); 2671 format %{ "paddw $dst,$src\t! add packed8S" %} 2672 ins_encode %{ 2673 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2674 %} 2675 ins_pipe( pipe_slow ); 2676%} 2677 2678instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2679 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2680 match(Set dst (AddVS src1 src2)); 2681 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2682 ins_encode %{ 2683 bool vector256 = false; 2684 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2685 %} 2686 ins_pipe( pipe_slow ); 2687%} 2688 2689instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2690 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2691 match(Set dst (AddVS src (LoadVector mem))); 2692 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2693 ins_encode %{ 2694 bool vector256 = false; 2695 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2696 %} 2697 ins_pipe( pipe_slow ); 2698%} 2699 2700instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2701 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2702 match(Set dst (AddVS src1 src2)); 2703 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2704 ins_encode %{ 2705 bool vector256 = true; 2706 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2707 %} 2708 ins_pipe( pipe_slow ); 2709%} 2710 2711instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2712 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2713 match(Set dst (AddVS src (LoadVector mem))); 2714 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2715 ins_encode %{ 2716 bool vector256 = true; 2717 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2718 %} 2719 ins_pipe( pipe_slow ); 2720%} 2721 2722// Integers vector add 2723instruct vadd2I(vecD dst, vecD src) %{ 2724 predicate(n->as_Vector()->length() == 2); 2725 match(Set dst (AddVI dst src)); 2726 format %{ "paddd $dst,$src\t! add packed2I" %} 2727 ins_encode %{ 2728 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2729 %} 2730 ins_pipe( pipe_slow ); 2731%} 2732 2733instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2735 match(Set dst (AddVI src1 src2)); 2736 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2737 ins_encode %{ 2738 bool vector256 = false; 2739 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2740 %} 2741 ins_pipe( pipe_slow ); 2742%} 2743 2744instruct vadd4I(vecX dst, vecX src) %{ 2745 predicate(n->as_Vector()->length() == 4); 2746 match(Set dst (AddVI dst src)); 2747 format %{ "paddd $dst,$src\t! add packed4I" %} 2748 ins_encode %{ 2749 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2750 %} 2751 ins_pipe( pipe_slow ); 2752%} 2753 2754instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2755 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2756 match(Set dst (AddVI src1 src2)); 2757 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2758 ins_encode %{ 2759 bool vector256 = false; 2760 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2761 %} 2762 ins_pipe( pipe_slow ); 2763%} 2764 2765instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2767 match(Set dst (AddVI src (LoadVector mem))); 2768 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2769 ins_encode %{ 2770 bool vector256 = false; 2771 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2772 %} 2773 ins_pipe( pipe_slow ); 2774%} 2775 2776instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2777 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2778 match(Set dst (AddVI src1 src2)); 2779 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2780 ins_encode %{ 2781 bool vector256 = true; 2782 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2783 %} 2784 ins_pipe( pipe_slow ); 2785%} 2786 2787instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2788 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2789 match(Set dst (AddVI src (LoadVector mem))); 2790 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2791 ins_encode %{ 2792 bool vector256 = true; 2793 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2794 %} 2795 ins_pipe( pipe_slow ); 2796%} 2797 2798// Longs vector add 2799instruct vadd2L(vecX dst, vecX src) %{ 2800 predicate(n->as_Vector()->length() == 2); 2801 match(Set dst (AddVL dst src)); 2802 format %{ "paddq $dst,$src\t! add packed2L" %} 2803 ins_encode %{ 2804 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2805 %} 2806 ins_pipe( pipe_slow ); 2807%} 2808 2809instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2810 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2811 match(Set dst (AddVL src1 src2)); 2812 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2813 ins_encode %{ 2814 bool vector256 = false; 2815 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2816 %} 2817 ins_pipe( pipe_slow ); 2818%} 2819 2820instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2822 match(Set dst (AddVL src (LoadVector mem))); 2823 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2824 ins_encode %{ 2825 bool vector256 = false; 2826 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2827 %} 2828 ins_pipe( pipe_slow ); 2829%} 2830 2831instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2832 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2833 match(Set dst (AddVL src1 src2)); 2834 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2835 ins_encode %{ 2836 bool vector256 = true; 2837 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2838 %} 2839 ins_pipe( pipe_slow ); 2840%} 2841 2842instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2843 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2844 match(Set dst (AddVL src (LoadVector mem))); 2845 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2846 ins_encode %{ 2847 bool vector256 = true; 2848 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2849 %} 2850 ins_pipe( pipe_slow ); 2851%} 2852 2853// Floats vector add 2854instruct vadd2F(vecD dst, vecD src) %{ 2855 predicate(n->as_Vector()->length() == 2); 2856 match(Set dst (AddVF dst src)); 2857 format %{ "addps $dst,$src\t! add packed2F" %} 2858 ins_encode %{ 2859 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2860 %} 2861 ins_pipe( pipe_slow ); 2862%} 2863 2864instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2865 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2866 match(Set dst (AddVF src1 src2)); 2867 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2868 ins_encode %{ 2869 bool vector256 = false; 2870 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2871 %} 2872 ins_pipe( pipe_slow ); 2873%} 2874 2875instruct vadd4F(vecX dst, vecX src) %{ 2876 predicate(n->as_Vector()->length() == 4); 2877 match(Set dst (AddVF dst src)); 2878 format %{ "addps $dst,$src\t! add packed4F" %} 2879 ins_encode %{ 2880 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2881 %} 2882 ins_pipe( pipe_slow ); 2883%} 2884 2885instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2886 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2887 match(Set dst (AddVF src1 src2)); 2888 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2889 ins_encode %{ 2890 bool vector256 = false; 2891 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2892 %} 2893 ins_pipe( pipe_slow ); 2894%} 2895 2896instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2897 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2898 match(Set dst (AddVF src (LoadVector mem))); 2899 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2900 ins_encode %{ 2901 bool vector256 = false; 2902 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2903 %} 2904 ins_pipe( pipe_slow ); 2905%} 2906 2907instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2908 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2909 match(Set dst (AddVF src1 src2)); 2910 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2911 ins_encode %{ 2912 bool vector256 = true; 2913 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2914 %} 2915 ins_pipe( pipe_slow ); 2916%} 2917 2918instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2919 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2920 match(Set dst (AddVF src (LoadVector mem))); 2921 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2922 ins_encode %{ 2923 bool vector256 = true; 2924 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2925 %} 2926 ins_pipe( pipe_slow ); 2927%} 2928 2929// Doubles vector add 2930instruct vadd2D(vecX dst, vecX src) %{ 2931 predicate(n->as_Vector()->length() == 2); 2932 match(Set dst (AddVD dst src)); 2933 format %{ "addpd $dst,$src\t! add packed2D" %} 2934 ins_encode %{ 2935 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2936 %} 2937 ins_pipe( pipe_slow ); 2938%} 2939 2940instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2941 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2942 match(Set dst (AddVD src1 src2)); 2943 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2944 ins_encode %{ 2945 bool vector256 = false; 2946 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2947 %} 2948 ins_pipe( pipe_slow ); 2949%} 2950 2951instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2952 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2953 match(Set dst (AddVD src (LoadVector mem))); 2954 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2955 ins_encode %{ 2956 bool vector256 = false; 2957 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2958 %} 2959 ins_pipe( pipe_slow ); 2960%} 2961 2962instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2963 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2964 match(Set dst (AddVD src1 src2)); 2965 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2966 ins_encode %{ 2967 bool vector256 = true; 2968 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2969 %} 2970 ins_pipe( pipe_slow ); 2971%} 2972 2973instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2974 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2975 match(Set dst (AddVD src (LoadVector mem))); 2976 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2977 ins_encode %{ 2978 bool vector256 = true; 2979 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2980 %} 2981 ins_pipe( pipe_slow ); 2982%} 2983 2984// --------------------------------- SUB -------------------------------------- 2985 2986// Bytes vector sub 2987instruct vsub4B(vecS dst, vecS src) %{ 2988 predicate(n->as_Vector()->length() == 4); 2989 match(Set dst (SubVB dst src)); 2990 format %{ "psubb $dst,$src\t! sub packed4B" %} 2991 ins_encode %{ 2992 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2993 %} 2994 ins_pipe( pipe_slow ); 2995%} 2996 2997instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 2998 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2999 match(Set dst (SubVB src1 src2)); 3000 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 3001 ins_encode %{ 3002 bool vector256 = false; 3003 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3004 %} 3005 ins_pipe( pipe_slow ); 3006%} 3007 3008instruct vsub8B(vecD dst, vecD src) %{ 3009 predicate(n->as_Vector()->length() == 8); 3010 match(Set dst (SubVB dst src)); 3011 format %{ "psubb $dst,$src\t! sub packed8B" %} 3012 ins_encode %{ 3013 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3014 %} 3015 ins_pipe( pipe_slow ); 3016%} 3017 3018instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 3019 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3020 match(Set dst (SubVB src1 src2)); 3021 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 3022 ins_encode %{ 3023 bool vector256 = false; 3024 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3025 %} 3026 ins_pipe( pipe_slow ); 3027%} 3028 3029instruct vsub16B(vecX dst, vecX src) %{ 3030 predicate(n->as_Vector()->length() == 16); 3031 match(Set dst (SubVB dst src)); 3032 format %{ "psubb $dst,$src\t! sub packed16B" %} 3033 ins_encode %{ 3034 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3035 %} 3036 ins_pipe( pipe_slow ); 3037%} 3038 3039instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 3040 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3041 match(Set dst (SubVB src1 src2)); 3042 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 3043 ins_encode %{ 3044 bool vector256 = false; 3045 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3046 %} 3047 ins_pipe( pipe_slow ); 3048%} 3049 3050instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 3051 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3052 match(Set dst (SubVB src (LoadVector mem))); 3053 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 3054 ins_encode %{ 3055 bool vector256 = false; 3056 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3057 %} 3058 ins_pipe( pipe_slow ); 3059%} 3060 3061instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 3062 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3063 match(Set dst (SubVB src1 src2)); 3064 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 3065 ins_encode %{ 3066 bool vector256 = true; 3067 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3068 %} 3069 ins_pipe( pipe_slow ); 3070%} 3071 3072instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 3073 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3074 match(Set dst (SubVB src (LoadVector mem))); 3075 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 3076 ins_encode %{ 3077 bool vector256 = true; 3078 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3079 %} 3080 ins_pipe( pipe_slow ); 3081%} 3082 3083// Shorts/Chars vector sub 3084instruct vsub2S(vecS dst, vecS src) %{ 3085 predicate(n->as_Vector()->length() == 2); 3086 match(Set dst (SubVS dst src)); 3087 format %{ "psubw $dst,$src\t! sub packed2S" %} 3088 ins_encode %{ 3089 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3090 %} 3091 ins_pipe( pipe_slow ); 3092%} 3093 3094instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 3095 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3096 match(Set dst (SubVS src1 src2)); 3097 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 3098 ins_encode %{ 3099 bool vector256 = false; 3100 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3101 %} 3102 ins_pipe( pipe_slow ); 3103%} 3104 3105instruct vsub4S(vecD dst, vecD src) %{ 3106 predicate(n->as_Vector()->length() == 4); 3107 match(Set dst (SubVS dst src)); 3108 format %{ "psubw $dst,$src\t! sub packed4S" %} 3109 ins_encode %{ 3110 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3111 %} 3112 ins_pipe( pipe_slow ); 3113%} 3114 3115instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 3116 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3117 match(Set dst (SubVS src1 src2)); 3118 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3119 ins_encode %{ 3120 bool vector256 = false; 3121 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3122 %} 3123 ins_pipe( pipe_slow ); 3124%} 3125 3126instruct vsub8S(vecX dst, vecX src) %{ 3127 predicate(n->as_Vector()->length() == 8); 3128 match(Set dst (SubVS dst src)); 3129 format %{ "psubw $dst,$src\t! sub packed8S" %} 3130 ins_encode %{ 3131 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3132 %} 3133 ins_pipe( pipe_slow ); 3134%} 3135 3136instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3137 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3138 match(Set dst (SubVS src1 src2)); 3139 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3140 ins_encode %{ 3141 bool vector256 = false; 3142 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3143 %} 3144 ins_pipe( pipe_slow ); 3145%} 3146 3147instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3148 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3149 match(Set dst (SubVS src (LoadVector mem))); 3150 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3151 ins_encode %{ 3152 bool vector256 = false; 3153 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3154 %} 3155 ins_pipe( pipe_slow ); 3156%} 3157 3158instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3159 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3160 match(Set dst (SubVS src1 src2)); 3161 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3162 ins_encode %{ 3163 bool vector256 = true; 3164 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3165 %} 3166 ins_pipe( pipe_slow ); 3167%} 3168 3169instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3170 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3171 match(Set dst (SubVS src (LoadVector mem))); 3172 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3173 ins_encode %{ 3174 bool vector256 = true; 3175 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3176 %} 3177 ins_pipe( pipe_slow ); 3178%} 3179 3180// Integers vector sub 3181instruct vsub2I(vecD dst, vecD src) %{ 3182 predicate(n->as_Vector()->length() == 2); 3183 match(Set dst (SubVI dst src)); 3184 format %{ "psubd $dst,$src\t! sub packed2I" %} 3185 ins_encode %{ 3186 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3187 %} 3188 ins_pipe( pipe_slow ); 3189%} 3190 3191instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3192 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3193 match(Set dst (SubVI src1 src2)); 3194 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3195 ins_encode %{ 3196 bool vector256 = false; 3197 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3198 %} 3199 ins_pipe( pipe_slow ); 3200%} 3201 3202instruct vsub4I(vecX dst, vecX src) %{ 3203 predicate(n->as_Vector()->length() == 4); 3204 match(Set dst (SubVI dst src)); 3205 format %{ "psubd $dst,$src\t! sub packed4I" %} 3206 ins_encode %{ 3207 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3208 %} 3209 ins_pipe( pipe_slow ); 3210%} 3211 3212instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3213 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3214 match(Set dst (SubVI src1 src2)); 3215 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3216 ins_encode %{ 3217 bool vector256 = false; 3218 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3219 %} 3220 ins_pipe( pipe_slow ); 3221%} 3222 3223instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3225 match(Set dst (SubVI src (LoadVector mem))); 3226 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3227 ins_encode %{ 3228 bool vector256 = false; 3229 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3230 %} 3231 ins_pipe( pipe_slow ); 3232%} 3233 3234instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3235 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3236 match(Set dst (SubVI src1 src2)); 3237 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3238 ins_encode %{ 3239 bool vector256 = true; 3240 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3241 %} 3242 ins_pipe( pipe_slow ); 3243%} 3244 3245instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3246 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3247 match(Set dst (SubVI src (LoadVector mem))); 3248 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3249 ins_encode %{ 3250 bool vector256 = true; 3251 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3252 %} 3253 ins_pipe( pipe_slow ); 3254%} 3255 3256// Longs vector sub 3257instruct vsub2L(vecX dst, vecX src) %{ 3258 predicate(n->as_Vector()->length() == 2); 3259 match(Set dst (SubVL dst src)); 3260 format %{ "psubq $dst,$src\t! sub packed2L" %} 3261 ins_encode %{ 3262 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3263 %} 3264 ins_pipe( pipe_slow ); 3265%} 3266 3267instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3268 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3269 match(Set dst (SubVL src1 src2)); 3270 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3271 ins_encode %{ 3272 bool vector256 = false; 3273 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3274 %} 3275 ins_pipe( pipe_slow ); 3276%} 3277 3278instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3280 match(Set dst (SubVL src (LoadVector mem))); 3281 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3282 ins_encode %{ 3283 bool vector256 = false; 3284 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3285 %} 3286 ins_pipe( pipe_slow ); 3287%} 3288 3289instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3290 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3291 match(Set dst (SubVL src1 src2)); 3292 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3293 ins_encode %{ 3294 bool vector256 = true; 3295 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3296 %} 3297 ins_pipe( pipe_slow ); 3298%} 3299 3300instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3301 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3302 match(Set dst (SubVL src (LoadVector mem))); 3303 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3304 ins_encode %{ 3305 bool vector256 = true; 3306 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3307 %} 3308 ins_pipe( pipe_slow ); 3309%} 3310 3311// Floats vector sub 3312instruct vsub2F(vecD dst, vecD src) %{ 3313 predicate(n->as_Vector()->length() == 2); 3314 match(Set dst (SubVF dst src)); 3315 format %{ "subps $dst,$src\t! sub packed2F" %} 3316 ins_encode %{ 3317 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3318 %} 3319 ins_pipe( pipe_slow ); 3320%} 3321 3322instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3323 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3324 match(Set dst (SubVF src1 src2)); 3325 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3326 ins_encode %{ 3327 bool vector256 = false; 3328 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3329 %} 3330 ins_pipe( pipe_slow ); 3331%} 3332 3333instruct vsub4F(vecX dst, vecX src) %{ 3334 predicate(n->as_Vector()->length() == 4); 3335 match(Set dst (SubVF dst src)); 3336 format %{ "subps $dst,$src\t! sub packed4F" %} 3337 ins_encode %{ 3338 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3339 %} 3340 ins_pipe( pipe_slow ); 3341%} 3342 3343instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3344 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3345 match(Set dst (SubVF src1 src2)); 3346 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3347 ins_encode %{ 3348 bool vector256 = false; 3349 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3350 %} 3351 ins_pipe( pipe_slow ); 3352%} 3353 3354instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3355 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3356 match(Set dst (SubVF src (LoadVector mem))); 3357 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3358 ins_encode %{ 3359 bool vector256 = false; 3360 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3361 %} 3362 ins_pipe( pipe_slow ); 3363%} 3364 3365instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3366 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3367 match(Set dst (SubVF src1 src2)); 3368 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3369 ins_encode %{ 3370 bool vector256 = true; 3371 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3372 %} 3373 ins_pipe( pipe_slow ); 3374%} 3375 3376instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3377 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3378 match(Set dst (SubVF src (LoadVector mem))); 3379 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3380 ins_encode %{ 3381 bool vector256 = true; 3382 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3383 %} 3384 ins_pipe( pipe_slow ); 3385%} 3386 3387// Doubles vector sub 3388instruct vsub2D(vecX dst, vecX src) %{ 3389 predicate(n->as_Vector()->length() == 2); 3390 match(Set dst (SubVD dst src)); 3391 format %{ "subpd $dst,$src\t! sub packed2D" %} 3392 ins_encode %{ 3393 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3394 %} 3395 ins_pipe( pipe_slow ); 3396%} 3397 3398instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3399 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3400 match(Set dst (SubVD src1 src2)); 3401 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3402 ins_encode %{ 3403 bool vector256 = false; 3404 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3405 %} 3406 ins_pipe( pipe_slow ); 3407%} 3408 3409instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3410 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3411 match(Set dst (SubVD src (LoadVector mem))); 3412 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3413 ins_encode %{ 3414 bool vector256 = false; 3415 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3416 %} 3417 ins_pipe( pipe_slow ); 3418%} 3419 3420instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3421 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3422 match(Set dst (SubVD src1 src2)); 3423 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3424 ins_encode %{ 3425 bool vector256 = true; 3426 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3427 %} 3428 ins_pipe( pipe_slow ); 3429%} 3430 3431instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3432 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3433 match(Set dst (SubVD src (LoadVector mem))); 3434 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3435 ins_encode %{ 3436 bool vector256 = true; 3437 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3438 %} 3439 ins_pipe( pipe_slow ); 3440%} 3441 3442// --------------------------------- MUL -------------------------------------- 3443 3444// Shorts/Chars vector mul 3445instruct vmul2S(vecS dst, vecS src) %{ 3446 predicate(n->as_Vector()->length() == 2); 3447 match(Set dst (MulVS dst src)); 3448 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3449 ins_encode %{ 3450 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3451 %} 3452 ins_pipe( pipe_slow ); 3453%} 3454 3455instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3456 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3457 match(Set dst (MulVS src1 src2)); 3458 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3459 ins_encode %{ 3460 bool vector256 = false; 3461 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3462 %} 3463 ins_pipe( pipe_slow ); 3464%} 3465 3466instruct vmul4S(vecD dst, vecD src) %{ 3467 predicate(n->as_Vector()->length() == 4); 3468 match(Set dst (MulVS dst src)); 3469 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3470 ins_encode %{ 3471 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3472 %} 3473 ins_pipe( pipe_slow ); 3474%} 3475 3476instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3477 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3478 match(Set dst (MulVS src1 src2)); 3479 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3480 ins_encode %{ 3481 bool vector256 = false; 3482 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3483 %} 3484 ins_pipe( pipe_slow ); 3485%} 3486 3487instruct vmul8S(vecX dst, vecX src) %{ 3488 predicate(n->as_Vector()->length() == 8); 3489 match(Set dst (MulVS dst src)); 3490 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3491 ins_encode %{ 3492 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3493 %} 3494 ins_pipe( pipe_slow ); 3495%} 3496 3497instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3498 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3499 match(Set dst (MulVS src1 src2)); 3500 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3501 ins_encode %{ 3502 bool vector256 = false; 3503 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3504 %} 3505 ins_pipe( pipe_slow ); 3506%} 3507 3508instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3509 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3510 match(Set dst (MulVS src (LoadVector mem))); 3511 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3512 ins_encode %{ 3513 bool vector256 = false; 3514 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3515 %} 3516 ins_pipe( pipe_slow ); 3517%} 3518 3519instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3520 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3521 match(Set dst (MulVS src1 src2)); 3522 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3523 ins_encode %{ 3524 bool vector256 = true; 3525 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3526 %} 3527 ins_pipe( pipe_slow ); 3528%} 3529 3530instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3531 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3532 match(Set dst (MulVS src (LoadVector mem))); 3533 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3534 ins_encode %{ 3535 bool vector256 = true; 3536 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3537 %} 3538 ins_pipe( pipe_slow ); 3539%} 3540 3541// Integers vector mul (sse4_1) 3542instruct vmul2I(vecD dst, vecD src) %{ 3543 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3544 match(Set dst (MulVI dst src)); 3545 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3546 ins_encode %{ 3547 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3548 %} 3549 ins_pipe( pipe_slow ); 3550%} 3551 3552instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3553 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3554 match(Set dst (MulVI src1 src2)); 3555 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3556 ins_encode %{ 3557 bool vector256 = false; 3558 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3559 %} 3560 ins_pipe( pipe_slow ); 3561%} 3562 3563instruct vmul4I(vecX dst, vecX src) %{ 3564 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3565 match(Set dst (MulVI dst src)); 3566 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3567 ins_encode %{ 3568 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3569 %} 3570 ins_pipe( pipe_slow ); 3571%} 3572 3573instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3574 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3575 match(Set dst (MulVI src1 src2)); 3576 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3577 ins_encode %{ 3578 bool vector256 = false; 3579 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3580 %} 3581 ins_pipe( pipe_slow ); 3582%} 3583 3584instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3585 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3586 match(Set dst (MulVI src (LoadVector mem))); 3587 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3588 ins_encode %{ 3589 bool vector256 = false; 3590 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3591 %} 3592 ins_pipe( pipe_slow ); 3593%} 3594 3595instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3596 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3597 match(Set dst (MulVI src1 src2)); 3598 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3599 ins_encode %{ 3600 bool vector256 = true; 3601 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3602 %} 3603 ins_pipe( pipe_slow ); 3604%} 3605 3606instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3607 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3608 match(Set dst (MulVI src (LoadVector mem))); 3609 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3610 ins_encode %{ 3611 bool vector256 = true; 3612 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3613 %} 3614 ins_pipe( pipe_slow ); 3615%} 3616 3617// Floats vector mul 3618instruct vmul2F(vecD dst, vecD src) %{ 3619 predicate(n->as_Vector()->length() == 2); 3620 match(Set dst (MulVF dst src)); 3621 format %{ "mulps $dst,$src\t! mul packed2F" %} 3622 ins_encode %{ 3623 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3624 %} 3625 ins_pipe( pipe_slow ); 3626%} 3627 3628instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3629 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3630 match(Set dst (MulVF src1 src2)); 3631 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3632 ins_encode %{ 3633 bool vector256 = false; 3634 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3635 %} 3636 ins_pipe( pipe_slow ); 3637%} 3638 3639instruct vmul4F(vecX dst, vecX src) %{ 3640 predicate(n->as_Vector()->length() == 4); 3641 match(Set dst (MulVF dst src)); 3642 format %{ "mulps $dst,$src\t! mul packed4F" %} 3643 ins_encode %{ 3644 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3645 %} 3646 ins_pipe( pipe_slow ); 3647%} 3648 3649instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3650 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3651 match(Set dst (MulVF src1 src2)); 3652 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3653 ins_encode %{ 3654 bool vector256 = false; 3655 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3656 %} 3657 ins_pipe( pipe_slow ); 3658%} 3659 3660instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3662 match(Set dst (MulVF src (LoadVector mem))); 3663 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3664 ins_encode %{ 3665 bool vector256 = false; 3666 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3667 %} 3668 ins_pipe( pipe_slow ); 3669%} 3670 3671instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3672 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3673 match(Set dst (MulVF src1 src2)); 3674 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3675 ins_encode %{ 3676 bool vector256 = true; 3677 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3678 %} 3679 ins_pipe( pipe_slow ); 3680%} 3681 3682instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3683 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3684 match(Set dst (MulVF src (LoadVector mem))); 3685 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3686 ins_encode %{ 3687 bool vector256 = true; 3688 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3689 %} 3690 ins_pipe( pipe_slow ); 3691%} 3692 3693// Doubles vector mul 3694instruct vmul2D(vecX dst, vecX src) %{ 3695 predicate(n->as_Vector()->length() == 2); 3696 match(Set dst (MulVD dst src)); 3697 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3698 ins_encode %{ 3699 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3700 %} 3701 ins_pipe( pipe_slow ); 3702%} 3703 3704instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3705 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3706 match(Set dst (MulVD src1 src2)); 3707 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3708 ins_encode %{ 3709 bool vector256 = false; 3710 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3711 %} 3712 ins_pipe( pipe_slow ); 3713%} 3714 3715instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3716 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3717 match(Set dst (MulVD src (LoadVector mem))); 3718 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3719 ins_encode %{ 3720 bool vector256 = false; 3721 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3722 %} 3723 ins_pipe( pipe_slow ); 3724%} 3725 3726instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3727 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3728 match(Set dst (MulVD src1 src2)); 3729 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3730 ins_encode %{ 3731 bool vector256 = true; 3732 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3733 %} 3734 ins_pipe( pipe_slow ); 3735%} 3736 3737instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3738 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3739 match(Set dst (MulVD src (LoadVector mem))); 3740 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3741 ins_encode %{ 3742 bool vector256 = true; 3743 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3744 %} 3745 ins_pipe( pipe_slow ); 3746%} 3747 3748// --------------------------------- DIV -------------------------------------- 3749 3750// Floats vector div 3751instruct vdiv2F(vecD dst, vecD src) %{ 3752 predicate(n->as_Vector()->length() == 2); 3753 match(Set dst (DivVF dst src)); 3754 format %{ "divps $dst,$src\t! div packed2F" %} 3755 ins_encode %{ 3756 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3757 %} 3758 ins_pipe( pipe_slow ); 3759%} 3760 3761instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3762 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3763 match(Set dst (DivVF src1 src2)); 3764 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3765 ins_encode %{ 3766 bool vector256 = false; 3767 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3768 %} 3769 ins_pipe( pipe_slow ); 3770%} 3771 3772instruct vdiv4F(vecX dst, vecX src) %{ 3773 predicate(n->as_Vector()->length() == 4); 3774 match(Set dst (DivVF dst src)); 3775 format %{ "divps $dst,$src\t! div packed4F" %} 3776 ins_encode %{ 3777 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3778 %} 3779 ins_pipe( pipe_slow ); 3780%} 3781 3782instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3783 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3784 match(Set dst (DivVF src1 src2)); 3785 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3786 ins_encode %{ 3787 bool vector256 = false; 3788 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3789 %} 3790 ins_pipe( pipe_slow ); 3791%} 3792 3793instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3794 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3795 match(Set dst (DivVF src (LoadVector mem))); 3796 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3797 ins_encode %{ 3798 bool vector256 = false; 3799 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3800 %} 3801 ins_pipe( pipe_slow ); 3802%} 3803 3804instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3805 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3806 match(Set dst (DivVF src1 src2)); 3807 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3808 ins_encode %{ 3809 bool vector256 = true; 3810 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3811 %} 3812 ins_pipe( pipe_slow ); 3813%} 3814 3815instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3816 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3817 match(Set dst (DivVF src (LoadVector mem))); 3818 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3819 ins_encode %{ 3820 bool vector256 = true; 3821 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3822 %} 3823 ins_pipe( pipe_slow ); 3824%} 3825 3826// Doubles vector div 3827instruct vdiv2D(vecX dst, vecX src) %{ 3828 predicate(n->as_Vector()->length() == 2); 3829 match(Set dst (DivVD dst src)); 3830 format %{ "divpd $dst,$src\t! div packed2D" %} 3831 ins_encode %{ 3832 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3833 %} 3834 ins_pipe( pipe_slow ); 3835%} 3836 3837instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3838 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3839 match(Set dst (DivVD src1 src2)); 3840 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3841 ins_encode %{ 3842 bool vector256 = false; 3843 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3844 %} 3845 ins_pipe( pipe_slow ); 3846%} 3847 3848instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3849 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3850 match(Set dst (DivVD src (LoadVector mem))); 3851 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3852 ins_encode %{ 3853 bool vector256 = false; 3854 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3855 %} 3856 ins_pipe( pipe_slow ); 3857%} 3858 3859instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3860 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3861 match(Set dst (DivVD src1 src2)); 3862 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3863 ins_encode %{ 3864 bool vector256 = true; 3865 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3866 %} 3867 ins_pipe( pipe_slow ); 3868%} 3869 3870instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3871 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3872 match(Set dst (DivVD src (LoadVector mem))); 3873 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3874 ins_encode %{ 3875 bool vector256 = true; 3876 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3877 %} 3878 ins_pipe( pipe_slow ); 3879%} 3880 3881// ------------------------------ Shift --------------------------------------- 3882 3883// Left and right shift count vectors are the same on x86 3884// (only lowest bits of xmm reg are used for count). 3885instruct vshiftcnt(vecS dst, rRegI cnt) %{ 3886 match(Set dst (LShiftCntV cnt)); 3887 match(Set dst (RShiftCntV cnt)); 3888 format %{ "movd $dst,$cnt\t! load shift count" %} 3889 ins_encode %{ 3890 __ movdl($dst$$XMMRegister, $cnt$$Register); 3891 %} 3892 ins_pipe( pipe_slow ); 3893%} 3894 3895// ------------------------------ LeftShift ----------------------------------- 3896 3897// Shorts/Chars vector left shift 3898instruct vsll2S(vecS dst, vecS shift) %{ 3899 predicate(n->as_Vector()->length() == 2); 3900 match(Set dst (LShiftVS dst shift)); 3901 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3902 ins_encode %{ 3903 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3904 %} 3905 ins_pipe( pipe_slow ); 3906%} 3907 3908instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3909 predicate(n->as_Vector()->length() == 2); 3910 match(Set dst (LShiftVS dst shift)); 3911 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3912 ins_encode %{ 3913 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3914 %} 3915 ins_pipe( pipe_slow ); 3916%} 3917 3918instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 3919 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3920 match(Set dst (LShiftVS src shift)); 3921 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3922 ins_encode %{ 3923 bool vector256 = false; 3924 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3925 %} 3926 ins_pipe( pipe_slow ); 3927%} 3928 3929instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3930 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3931 match(Set dst (LShiftVS src shift)); 3932 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3933 ins_encode %{ 3934 bool vector256 = false; 3935 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3936 %} 3937 ins_pipe( pipe_slow ); 3938%} 3939 3940instruct vsll4S(vecD dst, vecS shift) %{ 3941 predicate(n->as_Vector()->length() == 4); 3942 match(Set dst (LShiftVS dst shift)); 3943 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3944 ins_encode %{ 3945 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3946 %} 3947 ins_pipe( pipe_slow ); 3948%} 3949 3950instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3951 predicate(n->as_Vector()->length() == 4); 3952 match(Set dst (LShiftVS dst shift)); 3953 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3954 ins_encode %{ 3955 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3956 %} 3957 ins_pipe( pipe_slow ); 3958%} 3959 3960instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 3961 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3962 match(Set dst (LShiftVS src shift)); 3963 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3964 ins_encode %{ 3965 bool vector256 = false; 3966 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3967 %} 3968 ins_pipe( pipe_slow ); 3969%} 3970 3971instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3973 match(Set dst (LShiftVS src shift)); 3974 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3975 ins_encode %{ 3976 bool vector256 = false; 3977 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3978 %} 3979 ins_pipe( pipe_slow ); 3980%} 3981 3982instruct vsll8S(vecX dst, vecS shift) %{ 3983 predicate(n->as_Vector()->length() == 8); 3984 match(Set dst (LShiftVS dst shift)); 3985 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3986 ins_encode %{ 3987 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3988 %} 3989 ins_pipe( pipe_slow ); 3990%} 3991 3992instruct vsll8S_imm(vecX dst, immI8 shift) %{ 3993 predicate(n->as_Vector()->length() == 8); 3994 match(Set dst (LShiftVS dst shift)); 3995 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3996 ins_encode %{ 3997 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3998 %} 3999 ins_pipe( pipe_slow ); 4000%} 4001 4002instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 4003 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4004 match(Set dst (LShiftVS src shift)); 4005 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4006 ins_encode %{ 4007 bool vector256 = false; 4008 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4009 %} 4010 ins_pipe( pipe_slow ); 4011%} 4012 4013instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4014 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4015 match(Set dst (LShiftVS src shift)); 4016 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4017 ins_encode %{ 4018 bool vector256 = false; 4019 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4020 %} 4021 ins_pipe( pipe_slow ); 4022%} 4023 4024instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 4025 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4026 match(Set dst (LShiftVS src shift)); 4027 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4028 ins_encode %{ 4029 bool vector256 = true; 4030 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4031 %} 4032 ins_pipe( pipe_slow ); 4033%} 4034 4035instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4036 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4037 match(Set dst (LShiftVS src shift)); 4038 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4039 ins_encode %{ 4040 bool vector256 = true; 4041 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4042 %} 4043 ins_pipe( pipe_slow ); 4044%} 4045 4046// Integers vector left shift 4047instruct vsll2I(vecD dst, vecS shift) %{ 4048 predicate(n->as_Vector()->length() == 2); 4049 match(Set dst (LShiftVI dst shift)); 4050 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4051 ins_encode %{ 4052 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4053 %} 4054 ins_pipe( pipe_slow ); 4055%} 4056 4057instruct vsll2I_imm(vecD dst, immI8 shift) %{ 4058 predicate(n->as_Vector()->length() == 2); 4059 match(Set dst (LShiftVI dst shift)); 4060 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4061 ins_encode %{ 4062 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4063 %} 4064 ins_pipe( pipe_slow ); 4065%} 4066 4067instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 4068 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4069 match(Set dst (LShiftVI src shift)); 4070 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4071 ins_encode %{ 4072 bool vector256 = false; 4073 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4074 %} 4075 ins_pipe( pipe_slow ); 4076%} 4077 4078instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4079 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4080 match(Set dst (LShiftVI src shift)); 4081 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4082 ins_encode %{ 4083 bool vector256 = false; 4084 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4085 %} 4086 ins_pipe( pipe_slow ); 4087%} 4088 4089instruct vsll4I(vecX dst, vecS shift) %{ 4090 predicate(n->as_Vector()->length() == 4); 4091 match(Set dst (LShiftVI dst shift)); 4092 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4093 ins_encode %{ 4094 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4095 %} 4096 ins_pipe( pipe_slow ); 4097%} 4098 4099instruct vsll4I_imm(vecX dst, immI8 shift) %{ 4100 predicate(n->as_Vector()->length() == 4); 4101 match(Set dst (LShiftVI dst shift)); 4102 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4103 ins_encode %{ 4104 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4105 %} 4106 ins_pipe( pipe_slow ); 4107%} 4108 4109instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 4110 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4111 match(Set dst (LShiftVI src shift)); 4112 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4113 ins_encode %{ 4114 bool vector256 = false; 4115 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4116 %} 4117 ins_pipe( pipe_slow ); 4118%} 4119 4120instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4122 match(Set dst (LShiftVI src shift)); 4123 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4124 ins_encode %{ 4125 bool vector256 = false; 4126 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4127 %} 4128 ins_pipe( pipe_slow ); 4129%} 4130 4131instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 4132 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4133 match(Set dst (LShiftVI src shift)); 4134 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4135 ins_encode %{ 4136 bool vector256 = true; 4137 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4138 %} 4139 ins_pipe( pipe_slow ); 4140%} 4141 4142instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4143 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4144 match(Set dst (LShiftVI src shift)); 4145 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4146 ins_encode %{ 4147 bool vector256 = true; 4148 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4149 %} 4150 ins_pipe( pipe_slow ); 4151%} 4152 4153// Longs vector left shift 4154instruct vsll2L(vecX dst, vecS shift) %{ 4155 predicate(n->as_Vector()->length() == 2); 4156 match(Set dst (LShiftVL dst shift)); 4157 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4158 ins_encode %{ 4159 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4160 %} 4161 ins_pipe( pipe_slow ); 4162%} 4163 4164instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4165 predicate(n->as_Vector()->length() == 2); 4166 match(Set dst (LShiftVL dst shift)); 4167 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4168 ins_encode %{ 4169 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4170 %} 4171 ins_pipe( pipe_slow ); 4172%} 4173 4174instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 4175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4176 match(Set dst (LShiftVL src shift)); 4177 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4178 ins_encode %{ 4179 bool vector256 = false; 4180 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4181 %} 4182 ins_pipe( pipe_slow ); 4183%} 4184 4185instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4187 match(Set dst (LShiftVL src shift)); 4188 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4189 ins_encode %{ 4190 bool vector256 = false; 4191 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4192 %} 4193 ins_pipe( pipe_slow ); 4194%} 4195 4196instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 4197 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4198 match(Set dst (LShiftVL src shift)); 4199 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4200 ins_encode %{ 4201 bool vector256 = true; 4202 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4203 %} 4204 ins_pipe( pipe_slow ); 4205%} 4206 4207instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4208 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4209 match(Set dst (LShiftVL src shift)); 4210 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4211 ins_encode %{ 4212 bool vector256 = true; 4213 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4214 %} 4215 ins_pipe( pipe_slow ); 4216%} 4217 4218// ----------------------- LogicalRightShift ----------------------------------- 4219 4220// Shorts vector logical right shift produces incorrect Java result 4221// for negative data because java code convert short value into int with 4222// sign extension before a shift. But char vectors are fine since chars are 4223// unsigned values. 4224 4225instruct vsrl2S(vecS dst, vecS shift) %{ 4226 predicate(n->as_Vector()->length() == 2); 4227 match(Set dst (URShiftVS dst shift)); 4228 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4229 ins_encode %{ 4230 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4231 %} 4232 ins_pipe( pipe_slow ); 4233%} 4234 4235instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 4236 predicate(n->as_Vector()->length() == 2); 4237 match(Set dst (URShiftVS dst shift)); 4238 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4239 ins_encode %{ 4240 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4241 %} 4242 ins_pipe( pipe_slow ); 4243%} 4244 4245instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 4246 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4247 match(Set dst (URShiftVS src shift)); 4248 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4249 ins_encode %{ 4250 bool vector256 = false; 4251 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4252 %} 4253 ins_pipe( pipe_slow ); 4254%} 4255 4256instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4257 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4258 match(Set dst (URShiftVS src shift)); 4259 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4260 ins_encode %{ 4261 bool vector256 = false; 4262 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4263 %} 4264 ins_pipe( pipe_slow ); 4265%} 4266 4267instruct vsrl4S(vecD dst, vecS shift) %{ 4268 predicate(n->as_Vector()->length() == 4); 4269 match(Set dst (URShiftVS dst shift)); 4270 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4271 ins_encode %{ 4272 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4273 %} 4274 ins_pipe( pipe_slow ); 4275%} 4276 4277instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 4278 predicate(n->as_Vector()->length() == 4); 4279 match(Set dst (URShiftVS dst shift)); 4280 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4281 ins_encode %{ 4282 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4283 %} 4284 ins_pipe( pipe_slow ); 4285%} 4286 4287instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 4288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4289 match(Set dst (URShiftVS src shift)); 4290 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4291 ins_encode %{ 4292 bool vector256 = false; 4293 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4294 %} 4295 ins_pipe( pipe_slow ); 4296%} 4297 4298instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4299 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4300 match(Set dst (URShiftVS src shift)); 4301 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4302 ins_encode %{ 4303 bool vector256 = false; 4304 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4305 %} 4306 ins_pipe( pipe_slow ); 4307%} 4308 4309instruct vsrl8S(vecX dst, vecS shift) %{ 4310 predicate(n->as_Vector()->length() == 8); 4311 match(Set dst (URShiftVS dst shift)); 4312 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4313 ins_encode %{ 4314 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4315 %} 4316 ins_pipe( pipe_slow ); 4317%} 4318 4319instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 4320 predicate(n->as_Vector()->length() == 8); 4321 match(Set dst (URShiftVS dst shift)); 4322 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4323 ins_encode %{ 4324 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4325 %} 4326 ins_pipe( pipe_slow ); 4327%} 4328 4329instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 4330 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4331 match(Set dst (URShiftVS src shift)); 4332 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4333 ins_encode %{ 4334 bool vector256 = false; 4335 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4336 %} 4337 ins_pipe( pipe_slow ); 4338%} 4339 4340instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4341 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4342 match(Set dst (URShiftVS src shift)); 4343 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4344 ins_encode %{ 4345 bool vector256 = false; 4346 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4347 %} 4348 ins_pipe( pipe_slow ); 4349%} 4350 4351instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 4352 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4353 match(Set dst (URShiftVS src shift)); 4354 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4355 ins_encode %{ 4356 bool vector256 = true; 4357 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4358 %} 4359 ins_pipe( pipe_slow ); 4360%} 4361 4362instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4363 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4364 match(Set dst (URShiftVS src shift)); 4365 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4366 ins_encode %{ 4367 bool vector256 = true; 4368 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4369 %} 4370 ins_pipe( pipe_slow ); 4371%} 4372 4373// Integers vector logical right shift 4374instruct vsrl2I(vecD dst, vecS shift) %{ 4375 predicate(n->as_Vector()->length() == 2); 4376 match(Set dst (URShiftVI dst shift)); 4377 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4378 ins_encode %{ 4379 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4380 %} 4381 ins_pipe( pipe_slow ); 4382%} 4383 4384instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4385 predicate(n->as_Vector()->length() == 2); 4386 match(Set dst (URShiftVI dst shift)); 4387 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4388 ins_encode %{ 4389 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4390 %} 4391 ins_pipe( pipe_slow ); 4392%} 4393 4394instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 4395 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4396 match(Set dst (URShiftVI src shift)); 4397 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4398 ins_encode %{ 4399 bool vector256 = false; 4400 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4401 %} 4402 ins_pipe( pipe_slow ); 4403%} 4404 4405instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4406 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4407 match(Set dst (URShiftVI src shift)); 4408 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4409 ins_encode %{ 4410 bool vector256 = false; 4411 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4412 %} 4413 ins_pipe( pipe_slow ); 4414%} 4415 4416instruct vsrl4I(vecX dst, vecS shift) %{ 4417 predicate(n->as_Vector()->length() == 4); 4418 match(Set dst (URShiftVI dst shift)); 4419 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4420 ins_encode %{ 4421 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4422 %} 4423 ins_pipe( pipe_slow ); 4424%} 4425 4426instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4427 predicate(n->as_Vector()->length() == 4); 4428 match(Set dst (URShiftVI dst shift)); 4429 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4430 ins_encode %{ 4431 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4432 %} 4433 ins_pipe( pipe_slow ); 4434%} 4435 4436instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 4437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4438 match(Set dst (URShiftVI src shift)); 4439 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4440 ins_encode %{ 4441 bool vector256 = false; 4442 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4443 %} 4444 ins_pipe( pipe_slow ); 4445%} 4446 4447instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4448 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4449 match(Set dst (URShiftVI src shift)); 4450 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4451 ins_encode %{ 4452 bool vector256 = false; 4453 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4454 %} 4455 ins_pipe( pipe_slow ); 4456%} 4457 4458instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 4459 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4460 match(Set dst (URShiftVI src shift)); 4461 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4462 ins_encode %{ 4463 bool vector256 = true; 4464 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4465 %} 4466 ins_pipe( pipe_slow ); 4467%} 4468 4469instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4470 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4471 match(Set dst (URShiftVI src shift)); 4472 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4473 ins_encode %{ 4474 bool vector256 = true; 4475 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4476 %} 4477 ins_pipe( pipe_slow ); 4478%} 4479 4480// Longs vector logical right shift 4481instruct vsrl2L(vecX dst, vecS shift) %{ 4482 predicate(n->as_Vector()->length() == 2); 4483 match(Set dst (URShiftVL dst shift)); 4484 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4485 ins_encode %{ 4486 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4487 %} 4488 ins_pipe( pipe_slow ); 4489%} 4490 4491instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4492 predicate(n->as_Vector()->length() == 2); 4493 match(Set dst (URShiftVL dst shift)); 4494 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4495 ins_encode %{ 4496 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4497 %} 4498 ins_pipe( pipe_slow ); 4499%} 4500 4501instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 4502 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4503 match(Set dst (URShiftVL src shift)); 4504 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4505 ins_encode %{ 4506 bool vector256 = false; 4507 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4508 %} 4509 ins_pipe( pipe_slow ); 4510%} 4511 4512instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4514 match(Set dst (URShiftVL src shift)); 4515 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4516 ins_encode %{ 4517 bool vector256 = false; 4518 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4519 %} 4520 ins_pipe( pipe_slow ); 4521%} 4522 4523instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 4524 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4525 match(Set dst (URShiftVL src shift)); 4526 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4527 ins_encode %{ 4528 bool vector256 = true; 4529 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4530 %} 4531 ins_pipe( pipe_slow ); 4532%} 4533 4534instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4535 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4536 match(Set dst (URShiftVL src shift)); 4537 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4538 ins_encode %{ 4539 bool vector256 = true; 4540 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4541 %} 4542 ins_pipe( pipe_slow ); 4543%} 4544 4545// ------------------- ArithmeticRightShift ----------------------------------- 4546 4547// Shorts/Chars vector arithmetic right shift 4548instruct vsra2S(vecS dst, vecS shift) %{ 4549 predicate(n->as_Vector()->length() == 2); 4550 match(Set dst (RShiftVS dst shift)); 4551 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4552 ins_encode %{ 4553 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4554 %} 4555 ins_pipe( pipe_slow ); 4556%} 4557 4558instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4559 predicate(n->as_Vector()->length() == 2); 4560 match(Set dst (RShiftVS dst shift)); 4561 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4562 ins_encode %{ 4563 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4564 %} 4565 ins_pipe( pipe_slow ); 4566%} 4567 4568instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 4569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4570 match(Set dst (RShiftVS src shift)); 4571 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4572 ins_encode %{ 4573 bool vector256 = false; 4574 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4575 %} 4576 ins_pipe( pipe_slow ); 4577%} 4578 4579instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4580 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4581 match(Set dst (RShiftVS src shift)); 4582 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4583 ins_encode %{ 4584 bool vector256 = false; 4585 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4586 %} 4587 ins_pipe( pipe_slow ); 4588%} 4589 4590instruct vsra4S(vecD dst, vecS shift) %{ 4591 predicate(n->as_Vector()->length() == 4); 4592 match(Set dst (RShiftVS dst shift)); 4593 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4594 ins_encode %{ 4595 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4596 %} 4597 ins_pipe( pipe_slow ); 4598%} 4599 4600instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4601 predicate(n->as_Vector()->length() == 4); 4602 match(Set dst (RShiftVS dst shift)); 4603 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4604 ins_encode %{ 4605 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4606 %} 4607 ins_pipe( pipe_slow ); 4608%} 4609 4610instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 4611 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4612 match(Set dst (RShiftVS src shift)); 4613 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4614 ins_encode %{ 4615 bool vector256 = false; 4616 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4617 %} 4618 ins_pipe( pipe_slow ); 4619%} 4620 4621instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4622 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4623 match(Set dst (RShiftVS src shift)); 4624 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4625 ins_encode %{ 4626 bool vector256 = false; 4627 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4628 %} 4629 ins_pipe( pipe_slow ); 4630%} 4631 4632instruct vsra8S(vecX dst, vecS shift) %{ 4633 predicate(n->as_Vector()->length() == 8); 4634 match(Set dst (RShiftVS dst shift)); 4635 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4636 ins_encode %{ 4637 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4638 %} 4639 ins_pipe( pipe_slow ); 4640%} 4641 4642instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4643 predicate(n->as_Vector()->length() == 8); 4644 match(Set dst (RShiftVS dst shift)); 4645 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4646 ins_encode %{ 4647 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4648 %} 4649 ins_pipe( pipe_slow ); 4650%} 4651 4652instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 4653 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4654 match(Set dst (RShiftVS src shift)); 4655 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4656 ins_encode %{ 4657 bool vector256 = false; 4658 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4659 %} 4660 ins_pipe( pipe_slow ); 4661%} 4662 4663instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4665 match(Set dst (RShiftVS src shift)); 4666 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4667 ins_encode %{ 4668 bool vector256 = false; 4669 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4670 %} 4671 ins_pipe( pipe_slow ); 4672%} 4673 4674instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 4675 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4676 match(Set dst (RShiftVS src shift)); 4677 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4678 ins_encode %{ 4679 bool vector256 = true; 4680 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4681 %} 4682 ins_pipe( pipe_slow ); 4683%} 4684 4685instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4686 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4687 match(Set dst (RShiftVS src shift)); 4688 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4689 ins_encode %{ 4690 bool vector256 = true; 4691 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4692 %} 4693 ins_pipe( pipe_slow ); 4694%} 4695 4696// Integers vector arithmetic right shift 4697instruct vsra2I(vecD dst, vecS shift) %{ 4698 predicate(n->as_Vector()->length() == 2); 4699 match(Set dst (RShiftVI dst shift)); 4700 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4701 ins_encode %{ 4702 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4703 %} 4704 ins_pipe( pipe_slow ); 4705%} 4706 4707instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4708 predicate(n->as_Vector()->length() == 2); 4709 match(Set dst (RShiftVI dst shift)); 4710 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4711 ins_encode %{ 4712 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4713 %} 4714 ins_pipe( pipe_slow ); 4715%} 4716 4717instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 4718 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4719 match(Set dst (RShiftVI src shift)); 4720 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4721 ins_encode %{ 4722 bool vector256 = false; 4723 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4724 %} 4725 ins_pipe( pipe_slow ); 4726%} 4727 4728instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4729 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4730 match(Set dst (RShiftVI src shift)); 4731 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4732 ins_encode %{ 4733 bool vector256 = false; 4734 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4735 %} 4736 ins_pipe( pipe_slow ); 4737%} 4738 4739instruct vsra4I(vecX dst, vecS shift) %{ 4740 predicate(n->as_Vector()->length() == 4); 4741 match(Set dst (RShiftVI dst shift)); 4742 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4743 ins_encode %{ 4744 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4745 %} 4746 ins_pipe( pipe_slow ); 4747%} 4748 4749instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4750 predicate(n->as_Vector()->length() == 4); 4751 match(Set dst (RShiftVI dst shift)); 4752 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4753 ins_encode %{ 4754 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4755 %} 4756 ins_pipe( pipe_slow ); 4757%} 4758 4759instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 4760 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4761 match(Set dst (RShiftVI src shift)); 4762 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4763 ins_encode %{ 4764 bool vector256 = false; 4765 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4766 %} 4767 ins_pipe( pipe_slow ); 4768%} 4769 4770instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4771 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4772 match(Set dst (RShiftVI src shift)); 4773 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4774 ins_encode %{ 4775 bool vector256 = false; 4776 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4777 %} 4778 ins_pipe( pipe_slow ); 4779%} 4780 4781instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 4782 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4783 match(Set dst (RShiftVI src shift)); 4784 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4785 ins_encode %{ 4786 bool vector256 = true; 4787 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4788 %} 4789 ins_pipe( pipe_slow ); 4790%} 4791 4792instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4793 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4794 match(Set dst (RShiftVI src shift)); 4795 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4796 ins_encode %{ 4797 bool vector256 = true; 4798 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4799 %} 4800 ins_pipe( pipe_slow ); 4801%} 4802 4803// There are no longs vector arithmetic right shift instructions. 4804 4805 4806// --------------------------------- AND -------------------------------------- 4807 4808instruct vand4B(vecS dst, vecS src) %{ 4809 predicate(n->as_Vector()->length_in_bytes() == 4); 4810 match(Set dst (AndV dst src)); 4811 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4812 ins_encode %{ 4813 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4814 %} 4815 ins_pipe( pipe_slow ); 4816%} 4817 4818instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4819 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4820 match(Set dst (AndV src1 src2)); 4821 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4822 ins_encode %{ 4823 bool vector256 = false; 4824 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4825 %} 4826 ins_pipe( pipe_slow ); 4827%} 4828 4829instruct vand8B(vecD dst, vecD src) %{ 4830 predicate(n->as_Vector()->length_in_bytes() == 8); 4831 match(Set dst (AndV dst src)); 4832 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4833 ins_encode %{ 4834 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4835 %} 4836 ins_pipe( pipe_slow ); 4837%} 4838 4839instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4840 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4841 match(Set dst (AndV src1 src2)); 4842 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4843 ins_encode %{ 4844 bool vector256 = false; 4845 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4846 %} 4847 ins_pipe( pipe_slow ); 4848%} 4849 4850instruct vand16B(vecX dst, vecX src) %{ 4851 predicate(n->as_Vector()->length_in_bytes() == 16); 4852 match(Set dst (AndV dst src)); 4853 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4854 ins_encode %{ 4855 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4856 %} 4857 ins_pipe( pipe_slow ); 4858%} 4859 4860instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4861 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4862 match(Set dst (AndV src1 src2)); 4863 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4864 ins_encode %{ 4865 bool vector256 = false; 4866 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4867 %} 4868 ins_pipe( pipe_slow ); 4869%} 4870 4871instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4872 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4873 match(Set dst (AndV src (LoadVector mem))); 4874 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4875 ins_encode %{ 4876 bool vector256 = false; 4877 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4878 %} 4879 ins_pipe( pipe_slow ); 4880%} 4881 4882instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4883 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4884 match(Set dst (AndV src1 src2)); 4885 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4886 ins_encode %{ 4887 bool vector256 = true; 4888 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4889 %} 4890 ins_pipe( pipe_slow ); 4891%} 4892 4893instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4894 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4895 match(Set dst (AndV src (LoadVector mem))); 4896 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4897 ins_encode %{ 4898 bool vector256 = true; 4899 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4900 %} 4901 ins_pipe( pipe_slow ); 4902%} 4903 4904// --------------------------------- OR --------------------------------------- 4905 4906instruct vor4B(vecS dst, vecS src) %{ 4907 predicate(n->as_Vector()->length_in_bytes() == 4); 4908 match(Set dst (OrV dst src)); 4909 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4910 ins_encode %{ 4911 __ por($dst$$XMMRegister, $src$$XMMRegister); 4912 %} 4913 ins_pipe( pipe_slow ); 4914%} 4915 4916instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4917 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4918 match(Set dst (OrV src1 src2)); 4919 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4920 ins_encode %{ 4921 bool vector256 = false; 4922 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4923 %} 4924 ins_pipe( pipe_slow ); 4925%} 4926 4927instruct vor8B(vecD dst, vecD src) %{ 4928 predicate(n->as_Vector()->length_in_bytes() == 8); 4929 match(Set dst (OrV dst src)); 4930 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4931 ins_encode %{ 4932 __ por($dst$$XMMRegister, $src$$XMMRegister); 4933 %} 4934 ins_pipe( pipe_slow ); 4935%} 4936 4937instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4938 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4939 match(Set dst (OrV src1 src2)); 4940 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4941 ins_encode %{ 4942 bool vector256 = false; 4943 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4944 %} 4945 ins_pipe( pipe_slow ); 4946%} 4947 4948instruct vor16B(vecX dst, vecX src) %{ 4949 predicate(n->as_Vector()->length_in_bytes() == 16); 4950 match(Set dst (OrV dst src)); 4951 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4952 ins_encode %{ 4953 __ por($dst$$XMMRegister, $src$$XMMRegister); 4954 %} 4955 ins_pipe( pipe_slow ); 4956%} 4957 4958instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4959 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4960 match(Set dst (OrV src1 src2)); 4961 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4962 ins_encode %{ 4963 bool vector256 = false; 4964 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4965 %} 4966 ins_pipe( pipe_slow ); 4967%} 4968 4969instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4970 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4971 match(Set dst (OrV src (LoadVector mem))); 4972 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4973 ins_encode %{ 4974 bool vector256 = false; 4975 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4976 %} 4977 ins_pipe( pipe_slow ); 4978%} 4979 4980instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4981 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4982 match(Set dst (OrV src1 src2)); 4983 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4984 ins_encode %{ 4985 bool vector256 = true; 4986 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4987 %} 4988 ins_pipe( pipe_slow ); 4989%} 4990 4991instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 4992 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4993 match(Set dst (OrV src (LoadVector mem))); 4994 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 4995 ins_encode %{ 4996 bool vector256 = true; 4997 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4998 %} 4999 ins_pipe( pipe_slow ); 5000%} 5001 5002// --------------------------------- XOR -------------------------------------- 5003 5004instruct vxor4B(vecS dst, vecS src) %{ 5005 predicate(n->as_Vector()->length_in_bytes() == 4); 5006 match(Set dst (XorV dst src)); 5007 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 5008 ins_encode %{ 5009 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5010 %} 5011 ins_pipe( pipe_slow ); 5012%} 5013 5014instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5015 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5016 match(Set dst (XorV src1 src2)); 5017 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 5018 ins_encode %{ 5019 bool vector256 = false; 5020 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5021 %} 5022 ins_pipe( pipe_slow ); 5023%} 5024 5025instruct vxor8B(vecD dst, vecD src) %{ 5026 predicate(n->as_Vector()->length_in_bytes() == 8); 5027 match(Set dst (XorV dst src)); 5028 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 5029 ins_encode %{ 5030 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5031 %} 5032 ins_pipe( pipe_slow ); 5033%} 5034 5035instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5036 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5037 match(Set dst (XorV src1 src2)); 5038 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 5039 ins_encode %{ 5040 bool vector256 = false; 5041 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5042 %} 5043 ins_pipe( pipe_slow ); 5044%} 5045 5046instruct vxor16B(vecX dst, vecX src) %{ 5047 predicate(n->as_Vector()->length_in_bytes() == 16); 5048 match(Set dst (XorV dst src)); 5049 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 5050 ins_encode %{ 5051 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5052 %} 5053 ins_pipe( pipe_slow ); 5054%} 5055 5056instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5057 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5058 match(Set dst (XorV src1 src2)); 5059 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 5060 ins_encode %{ 5061 bool vector256 = false; 5062 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5063 %} 5064 ins_pipe( pipe_slow ); 5065%} 5066 5067instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 5068 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5069 match(Set dst (XorV src (LoadVector mem))); 5070 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 5071 ins_encode %{ 5072 bool vector256 = false; 5073 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5074 %} 5075 ins_pipe( pipe_slow ); 5076%} 5077 5078instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5079 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5080 match(Set dst (XorV src1 src2)); 5081 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 5082 ins_encode %{ 5083 bool vector256 = true; 5084 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5085 %} 5086 ins_pipe( pipe_slow ); 5087%} 5088 5089instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 5090 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5091 match(Set dst (XorV src (LoadVector mem))); 5092 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 5093 ins_encode %{ 5094 bool vector256 = true; 5095 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5096 %} 5097 ins_pipe( pipe_slow ); 5098%} 5099 5100