1// 2// Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Common Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63// Word a in each register holds a Float, words ab hold a Double. 64// The whole registers are used in SSE4.2 version intrinsics, 65// array copy stubs and superword operations (see UseSSE42Intrinsics, 66// UseXMMForArrayCopy and UseSuperword flags). 67// For pre EVEX enabled architectures: 68// XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69// For EVEX enabled architectures: 70// XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71// 72// Linux ABI: No register preserved across function calls 73// XMM0-XMM7 might hold parameters 74// Windows ABI: XMM6-XMM31 preserved across function calls 75// XMM0-XMM3 might hold parameters 76 77reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213#ifdef _LP64 214 215reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623#endif // _LP64 624 625#ifdef _LP64 626reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627#else 628reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629#endif // _LP64 630 631// AVX3 Mask Registers. 632reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662#ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687#endif 688 ); 689 690alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706reg_class vectmask_reg_K1(K1, K1_H); 707reg_class vectmask_reg_K2(K2, K2_H); 708reg_class vectmask_reg_K3(K3, K3_H); 709reg_class vectmask_reg_K4(K4, K4_H); 710reg_class vectmask_reg_K5(K5, K5_H); 711reg_class vectmask_reg_K6(K6, K6_H); 712reg_class vectmask_reg_K7(K7, K7_H); 713 714// flags allocation class should be last. 715alloc_class chunk3(RFLAGS); 716 717 718// Singleton class for condition codes 719reg_class int_flags(RFLAGS); 720 721// Class for pre evex float registers 722reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730#ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739#endif 740 ); 741 742// Class for evex float registers 743reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751#ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776#endif 777 ); 778 779reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782// Class for pre evex double registers 783reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791#ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800#endif 801 ); 802 803// Class for evex double registers 804reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812#ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837#endif 838 ); 839 840reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843// Class for pre evex 32bit vector registers 844reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852#ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861#endif 862 ); 863 864// Class for evex 32bit vector registers 865reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873#ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898#endif 899 ); 900 901reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904// Class for all 64bit vector registers 905reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913#ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922#endif 923 ); 924 925// Class for all 64bit vector registers 926reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934#ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959#endif 960 ); 961 962reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965// Class for all 128bit vector registers 966reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974#ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983#endif 984 ); 985 986// Class for all 128bit vector registers 987reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995#ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020#endif 1021 ); 1022 1023reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026// Class for all 256bit vector registers 1027reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035#ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044#endif 1045 ); 1046 1047// Class for all 256bit vector registers 1048reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056#ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081#endif 1082 ); 1083 1084reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087// Class for all 512bit vector registers 1088reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096#ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121#endif 1122 ); 1123 1124// Class for restricted 512bit vector registers 1125reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133#ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142#endif 1143 ); 1144 1145reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149%} 1150 1151 1152//----------SOURCE BLOCK------------------------------------------------------- 1153// This is a block of C++ code which provides values, functions, and 1154// definitions necessary in the rest of the architecture description 1155 1156source_hpp %{ 1157// Header information of the source block. 1158// Method declarations/definitions which are used outside 1159// the ad-scope can conveniently be defined here. 1160// 1161// To keep related declarations/definitions/uses close together, 1162// we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164#include "runtime/vm_version.hpp" 1165 1166class NativeJump; 1167 1168class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184}; 1185 1186class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202#ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207#else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216#endif 1217}; 1218 1219 1220inline uint vector_length(const Node* n) { 1221 const TypeVect* vt = n->bottom_type()->is_vect(); 1222 return vt->length(); 1223} 1224 1225inline uint vector_length(const MachNode* use, MachOper* opnd) { 1226 uint def_idx = use->operand_index(opnd); 1227 Node* def = use->in(def_idx); 1228 return def->bottom_type()->is_vect()->length(); 1229} 1230 1231inline uint vector_length_in_bytes(const Node* n) { 1232 const TypeVect* vt = n->bottom_type()->is_vect(); 1233 return vt->length_in_bytes(); 1234} 1235 1236inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { 1237 uint def_idx = use->operand_index(opnd); 1238 Node* def = use->in(def_idx); 1239 return def->bottom_type()->is_vect()->length_in_bytes(); 1240} 1241 1242inline BasicType vector_element_basic_type(const Node *n) { 1243 return n->bottom_type()->is_vect()->element_basic_type(); 1244} 1245 1246inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) { 1247 uint def_idx = use->operand_index(opnd); 1248 Node* def = use->in(def_idx); 1249 return def->bottom_type()->is_vect()->element_basic_type(); 1250} 1251 1252inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1253 switch(bytes) { 1254 case 4: // fall-through 1255 case 8: // fall-through 1256 case 16: return Assembler::AVX_128bit; 1257 case 32: return Assembler::AVX_256bit; 1258 case 64: return Assembler::AVX_512bit; 1259 1260 default: { 1261 ShouldNotReachHere(); 1262 return Assembler::AVX_NoVec; 1263 } 1264 } 1265} 1266 1267static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1268 return vector_length_encoding(vector_length_in_bytes(n)); 1269} 1270 1271static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1272 uint def_idx = use->operand_index(opnd); 1273 Node* def = use->in(def_idx); 1274 return vector_length_encoding(def); 1275} 1276 1277static inline bool is_unsigned_booltest_pred(int bt) { 1278 return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare); 1279} 1280 1281class Node::PD { 1282public: 1283 enum NodeFlags { 1284 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1285 _last_flag = Flag_intel_jcc_erratum 1286 }; 1287}; 1288 1289%} // end source_hpp 1290 1291source %{ 1292 1293#include "opto/addnode.hpp" 1294#include "c2_intelJccErratum_x86.hpp" 1295 1296void PhaseOutput::pd_perform_mach_node_analysis() { 1297 if (VM_Version::has_intel_jcc_erratum()) { 1298 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1299 _buf_sizes._code += extra_padding; 1300 } 1301} 1302 1303int MachNode::pd_alignment_required() const { 1304 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1305 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1306 return IntelJccErratum::largest_jcc_size() + 1; 1307 } else { 1308 return 1; 1309 } 1310} 1311 1312int MachNode::compute_padding(int current_offset) const { 1313 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1314 Compile* C = Compile::current(); 1315 PhaseOutput* output = C->output(); 1316 Block* block = output->block(); 1317 int index = output->index(); 1318 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1319 } else { 1320 return 0; 1321 } 1322} 1323 1324// Emit exception handler code. 1325// Stuff framesize into a register and call a VM stub routine. 1326int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 C2_MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_exception_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1338 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1339 __ end_a_stub(); 1340 return offset; 1341} 1342 1343// Emit deopt handler code. 1344int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1345 1346 // Note that the code buffer's insts_mark is always relative to insts. 1347 // That's why we must use the macroassembler to generate a handler. 1348 C2_MacroAssembler _masm(&cbuf); 1349 address base = __ start_a_stub(size_deopt_handler()); 1350 if (base == NULL) { 1351 ciEnv::current()->record_failure("CodeCache is full"); 1352 return 0; // CodeBuffer::expand failed 1353 } 1354 int offset = __ offset(); 1355 1356#ifdef _LP64 1357 address the_pc = (address) __ pc(); 1358 Label next; 1359 // push a "the_pc" on the stack without destroying any registers 1360 // as they all may be live. 1361 1362 // push address of "next" 1363 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1364 __ bind(next); 1365 // adjust it so it matches "the_pc" 1366 __ subptr(Address(rsp, 0), __ offset() - offset); 1367#else 1368 InternalAddress here(__ pc()); 1369 __ pushptr(here.addr()); 1370#endif 1371 1372 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1373 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1374 __ end_a_stub(); 1375 return offset; 1376} 1377 1378Assembler::Width widthForType(BasicType bt) { 1379 if (bt == T_BYTE) { 1380 return Assembler::B; 1381 } else if (bt == T_SHORT) { 1382 return Assembler::W; 1383 } else if (bt == T_INT) { 1384 return Assembler::D; 1385 } else { 1386 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1387 return Assembler::Q; 1388 } 1389} 1390 1391//============================================================================= 1392 1393 // Float masks come from different places depending on platform. 1394#ifdef _LP64 1395 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1396 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1397 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1398 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1399#else 1400 static address float_signmask() { return (address)float_signmask_pool; } 1401 static address float_signflip() { return (address)float_signflip_pool; } 1402 static address double_signmask() { return (address)double_signmask_pool; } 1403 static address double_signflip() { return (address)double_signflip_pool; } 1404#endif 1405 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1406 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1407 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1408 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1409 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1410 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1411 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1412 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1413 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1414 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1415 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1416 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1417 1418//============================================================================= 1419const bool Matcher::match_rule_supported(int opcode) { 1420 if (!has_match_rule(opcode)) { 1421 return false; // no match rule present 1422 } 1423 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1424 switch (opcode) { 1425 case Op_AbsVL: 1426 case Op_StoreVectorScatter: 1427 if (UseAVX < 3) { 1428 return false; 1429 } 1430 break; 1431 case Op_PopCountI: 1432 case Op_PopCountL: 1433 if (!UsePopCountInstruction) { 1434 return false; 1435 } 1436 break; 1437 case Op_PopCountVI: 1438 if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { 1439 return false; 1440 } 1441 break; 1442 case Op_MulVI: 1443 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1444 return false; 1445 } 1446 break; 1447 case Op_MulVL: 1448 if (UseSSE < 4) { // only with SSE4_1 or AVX 1449 return false; 1450 } 1451 break; 1452 case Op_MulReductionVL: 1453 if (VM_Version::supports_avx512dq() == false) { 1454 return false; 1455 } 1456 break; 1457 case Op_AddReductionVL: 1458 if (UseSSE < 2) { // requires at least SSE2 1459 return false; 1460 } 1461 break; 1462 case Op_AbsVB: 1463 case Op_AbsVS: 1464 case Op_AbsVI: 1465 case Op_AddReductionVI: 1466 case Op_AndReductionV: 1467 case Op_OrReductionV: 1468 case Op_XorReductionV: 1469 if (UseSSE < 3) { // requires at least SSSE3 1470 return false; 1471 } 1472 break; 1473 case Op_VectorLoadShuffle: 1474 case Op_VectorRearrange: 1475 case Op_MulReductionVI: 1476 if (UseSSE < 4) { // requires at least SSE4 1477 return false; 1478 } 1479 break; 1480 case Op_SqrtVD: 1481 case Op_SqrtVF: 1482 case Op_VectorMaskCmp: 1483 case Op_VectorCastB2X: 1484 case Op_VectorCastS2X: 1485 case Op_VectorCastI2X: 1486 case Op_VectorCastL2X: 1487 case Op_VectorCastF2X: 1488 case Op_VectorCastD2X: 1489 if (UseAVX < 1) { // enabled for AVX only 1490 return false; 1491 } 1492 break; 1493 case Op_CompareAndSwapL: 1494#ifdef _LP64 1495 case Op_CompareAndSwapP: 1496#endif 1497 if (!VM_Version::supports_cx8()) { 1498 return false; 1499 } 1500 break; 1501 case Op_CMoveVF: 1502 case Op_CMoveVD: 1503 if (UseAVX < 1) { // enabled for AVX only 1504 return false; 1505 } 1506 break; 1507 case Op_StrIndexOf: 1508 if (!UseSSE42Intrinsics) { 1509 return false; 1510 } 1511 break; 1512 case Op_StrIndexOfChar: 1513 if (!UseSSE42Intrinsics) { 1514 return false; 1515 } 1516 break; 1517 case Op_OnSpinWait: 1518 if (VM_Version::supports_on_spin_wait() == false) { 1519 return false; 1520 } 1521 break; 1522 case Op_MulVB: 1523 case Op_LShiftVB: 1524 case Op_RShiftVB: 1525 case Op_URShiftVB: 1526 case Op_VectorInsert: 1527 case Op_VectorLoadMask: 1528 case Op_VectorStoreMask: 1529 case Op_VectorBlend: 1530 if (UseSSE < 4) { 1531 return false; 1532 } 1533 break; 1534#ifdef _LP64 1535 case Op_MaxD: 1536 case Op_MaxF: 1537 case Op_MinD: 1538 case Op_MinF: 1539 if (UseAVX < 1) { // enabled for AVX only 1540 return false; 1541 } 1542 break; 1543#endif 1544 case Op_CacheWB: 1545 case Op_CacheWBPreSync: 1546 case Op_CacheWBPostSync: 1547 if (!VM_Version::supports_data_cache_line_flush()) { 1548 return false; 1549 } 1550 break; 1551 case Op_ExtractB: 1552 case Op_ExtractL: 1553 case Op_ExtractI: 1554 case Op_RoundDoubleMode: 1555 if (UseSSE < 4) { 1556 return false; 1557 } 1558 break; 1559 case Op_RoundDoubleModeV: 1560 if (VM_Version::supports_avx() == false) { 1561 return false; // 128bit vroundpd is not available 1562 } 1563 break; 1564 case Op_LoadVectorGather: 1565 if (UseAVX < 2) { 1566 return false; 1567 } 1568 break; 1569 case Op_FmaVD: 1570 case Op_FmaVF: 1571 if (!UseFMA) { 1572 return false; 1573 } 1574 break; 1575 case Op_MacroLogicV: 1576 if (UseAVX < 3 || !UseVectorMacroLogic) { 1577 return false; 1578 } 1579 break; 1580 1581 case Op_VectorCmpMasked: 1582 case Op_VectorMaskGen: 1583 case Op_LoadVectorMasked: 1584 case Op_StoreVectorMasked: 1585 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1586 return false; 1587 } 1588 break; 1589 case Op_VectorMaskFirstTrue: 1590 case Op_VectorMaskLastTrue: 1591 case Op_VectorMaskTrueCount: 1592 if (!is_LP64 || UseAVX < 1) { 1593 return false; 1594 } 1595 break; 1596#ifndef _LP64 1597 case Op_AddReductionVF: 1598 case Op_AddReductionVD: 1599 case Op_MulReductionVF: 1600 case Op_MulReductionVD: 1601 if (UseSSE < 1) { // requires at least SSE 1602 return false; 1603 } 1604 break; 1605 case Op_MulAddVS2VI: 1606 case Op_RShiftVL: 1607 case Op_AbsVD: 1608 case Op_NegVD: 1609 if (UseSSE < 2) { 1610 return false; 1611 } 1612 break; 1613#endif // !LP64 1614 case Op_SignumF: 1615 if (UseSSE < 1) { 1616 return false; 1617 } 1618 break; 1619 case Op_SignumD: 1620 if (UseSSE < 2) { 1621 return false; 1622 } 1623 break; 1624 } 1625 return true; // Match rules are supported by default. 1626} 1627 1628//------------------------------------------------------------------------ 1629 1630// Identify extra cases that we might want to provide match rules for vector nodes and 1631// other intrinsics guarded with vector length (vlen) and element type (bt). 1632const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1633 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1634 if (!match_rule_supported(opcode)) { 1635 return false; 1636 } 1637 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1638 // * SSE2 supports 128bit vectors for all types; 1639 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1640 // * AVX2 supports 256bit vectors for all types; 1641 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1642 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1643 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1644 // And MaxVectorSize is taken into account as well. 1645 if (!vector_size_supported(bt, vlen)) { 1646 return false; 1647 } 1648 // Special cases which require vector length follow: 1649 // * implementation limitations 1650 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1651 // * 128bit vroundpd instruction is present only in AVX1 1652 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1653 switch (opcode) { 1654 case Op_AbsVF: 1655 case Op_NegVF: 1656 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1657 return false; // 512bit vandps and vxorps are not available 1658 } 1659 break; 1660 case Op_AbsVD: 1661 case Op_NegVD: 1662 case Op_MulVL: 1663 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1664 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1665 } 1666 break; 1667 case Op_CMoveVF: 1668 if (vlen != 8) { 1669 return false; // implementation limitation (only vcmov8F_reg is present) 1670 } 1671 break; 1672 case Op_RotateRightV: 1673 case Op_RotateLeftV: 1674 case Op_MacroLogicV: 1675 if (!VM_Version::supports_evex() || 1676 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1677 return false; 1678 } 1679 break; 1680 case Op_ClearArray: 1681 case Op_VectorMaskGen: 1682 case Op_VectorCmpMasked: 1683 case Op_LoadVectorMasked: 1684 case Op_StoreVectorMasked: 1685 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1686 return false; 1687 } 1688 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1689 return false; 1690 } 1691 break; 1692 case Op_CMoveVD: 1693 if (vlen != 4) { 1694 return false; // implementation limitation (only vcmov4D_reg is present) 1695 } 1696 break; 1697 case Op_MaxV: 1698 case Op_MinV: 1699 if (UseSSE < 4 && is_integral_type(bt)) { 1700 return false; 1701 } 1702 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1703 // Float/Double intrinsics are enabled for AVX family currently. 1704 if (UseAVX == 0) { 1705 return false; 1706 } 1707 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1708 return false; 1709 } 1710 } 1711 break; 1712 case Op_CallLeafVector: 1713 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1714 return false; 1715 } 1716 break; 1717 case Op_AddReductionVI: 1718 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1719 return false; 1720 } 1721 // fallthrough 1722 case Op_AndReductionV: 1723 case Op_OrReductionV: 1724 case Op_XorReductionV: 1725 if (is_subword_type(bt) && (UseSSE < 4)) { 1726 return false; 1727 } 1728#ifndef _LP64 1729 if (bt == T_BYTE || bt == T_LONG) { 1730 return false; 1731 } 1732#endif 1733 break; 1734#ifndef _LP64 1735 case Op_VectorInsert: 1736 if (bt == T_LONG || bt == T_DOUBLE) { 1737 return false; 1738 } 1739 break; 1740#endif 1741 case Op_MinReductionV: 1742 case Op_MaxReductionV: 1743 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1744 return false; 1745 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1746 return false; 1747 } 1748 // Float/Double intrinsics enabled for AVX family. 1749 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1750 return false; 1751 } 1752 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1753 return false; 1754 } 1755#ifndef _LP64 1756 if (bt == T_BYTE || bt == T_LONG) { 1757 return false; 1758 } 1759#endif 1760 break; 1761 case Op_VectorTest: 1762 if (UseSSE < 4) { 1763 return false; // Implementation limitation 1764 } else if (size_in_bits < 32) { 1765 return false; // Implementation limitation 1766 } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) { 1767 return false; // Implementation limitation 1768 } 1769 break; 1770 case Op_VectorLoadShuffle: 1771 case Op_VectorRearrange: 1772 if(vlen == 2) { 1773 return false; // Implementation limitation due to how shuffle is loaded 1774 } else if (size_in_bits == 256 && UseAVX < 2) { 1775 return false; // Implementation limitation 1776 } else if (bt == T_BYTE && size_in_bits > 256 && !VM_Version::supports_avx512_vbmi()) { 1777 return false; // Implementation limitation 1778 } else if (bt == T_SHORT && size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1779 return false; // Implementation limitation 1780 } 1781 break; 1782 case Op_VectorLoadMask: 1783 if (size_in_bits == 256 && UseAVX < 2) { 1784 return false; // Implementation limitation 1785 } 1786 // fallthrough 1787 case Op_VectorStoreMask: 1788 if (vlen == 2) { 1789 return false; // Implementation limitation 1790 } 1791 break; 1792 case Op_VectorCastB2X: 1793 if (size_in_bits == 256 && UseAVX < 2) { 1794 return false; // Implementation limitation 1795 } 1796 break; 1797 case Op_VectorCastS2X: 1798 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1799 return false; 1800 } 1801 break; 1802 case Op_VectorCastI2X: 1803 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1804 return false; 1805 } 1806 break; 1807 case Op_VectorCastL2X: 1808 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1809 return false; 1810 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1811 return false; 1812 } 1813 break; 1814 case Op_VectorCastF2X: 1815 case Op_VectorCastD2X: 1816 if (is_integral_type(bt)) { 1817 // Casts from FP to integral types require special fixup logic not easily 1818 // implementable with vectors. 1819 return false; // Implementation limitation 1820 } 1821 case Op_MulReductionVI: 1822 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1823 return false; 1824 } 1825 break; 1826 case Op_StoreVectorScatter: 1827 if(bt == T_BYTE || bt == T_SHORT) { 1828 return false; 1829 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1830 return false; 1831 } 1832 // fallthrough 1833 case Op_LoadVectorGather: 1834 if (size_in_bits == 64 ) { 1835 return false; 1836 } 1837 break; 1838 case Op_VectorMaskCmp: 1839 if (vlen < 2 || size_in_bits < 32) { 1840 return false; 1841 } 1842 break; 1843 } 1844 return true; // Per default match rules are supported. 1845} 1846 1847MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 1848 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 1849 bool legacy = (generic_opnd->opcode() == LEGVEC); 1850 if (!VM_Version::supports_avx512vlbwdq() && // KNL 1851 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 1852 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 1853 return new legVecZOper(); 1854 } 1855 if (legacy) { 1856 switch (ideal_reg) { 1857 case Op_VecS: return new legVecSOper(); 1858 case Op_VecD: return new legVecDOper(); 1859 case Op_VecX: return new legVecXOper(); 1860 case Op_VecY: return new legVecYOper(); 1861 case Op_VecZ: return new legVecZOper(); 1862 } 1863 } else { 1864 switch (ideal_reg) { 1865 case Op_VecS: return new vecSOper(); 1866 case Op_VecD: return new vecDOper(); 1867 case Op_VecX: return new vecXOper(); 1868 case Op_VecY: return new vecYOper(); 1869 case Op_VecZ: return new vecZOper(); 1870 } 1871 } 1872 ShouldNotReachHere(); 1873 return NULL; 1874} 1875 1876bool Matcher::is_generic_reg2reg_move(MachNode* m) { 1877 switch (m->rule()) { 1878 case MoveVec2Leg_rule: 1879 case MoveLeg2Vec_rule: 1880 return true; 1881 default: 1882 return false; 1883 } 1884} 1885 1886bool Matcher::is_generic_vector(MachOper* opnd) { 1887 switch (opnd->opcode()) { 1888 case VEC: 1889 case LEGVEC: 1890 return true; 1891 default: 1892 return false; 1893 } 1894} 1895 1896//------------------------------------------------------------------------ 1897 1898const RegMask* Matcher::predicate_reg_mask(void) { 1899 return &_VECTMASK_REG_mask; 1900} 1901 1902const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) { 1903 return new TypeVectMask(TypeInt::BOOL, length); 1904} 1905 1906const int Matcher::float_pressure(int default_pressure_threshold) { 1907 int float_pressure_threshold = default_pressure_threshold; 1908#ifdef _LP64 1909 if (UseAVX > 2) { 1910 // Increase pressure threshold on machines with AVX3 which have 1911 // 2x more XMM registers. 1912 float_pressure_threshold = default_pressure_threshold * 2; 1913 } 1914#endif 1915 return float_pressure_threshold; 1916} 1917 1918// Max vector size in bytes. 0 if not supported. 1919const int Matcher::vector_width_in_bytes(BasicType bt) { 1920 assert(is_java_primitive(bt), "only primitive type vectors"); 1921 if (UseSSE < 2) return 0; 1922 // SSE2 supports 128bit vectors for all types. 1923 // AVX2 supports 256bit vectors for all types. 1924 // AVX2/EVEX supports 512bit vectors for all types. 1925 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1926 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1927 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1928 size = (UseAVX > 2) ? 64 : 32; 1929 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1930 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1931 // Use flag to limit vector size. 1932 size = MIN2(size,(int)MaxVectorSize); 1933 // Minimum 2 values in vector (or 4 for bytes). 1934 switch (bt) { 1935 case T_DOUBLE: 1936 case T_LONG: 1937 if (size < 16) return 0; 1938 break; 1939 case T_FLOAT: 1940 case T_INT: 1941 if (size < 8) return 0; 1942 break; 1943 case T_BOOLEAN: 1944 if (size < 4) return 0; 1945 break; 1946 case T_CHAR: 1947 if (size < 4) return 0; 1948 break; 1949 case T_BYTE: 1950 if (size < 4) return 0; 1951 break; 1952 case T_SHORT: 1953 if (size < 4) return 0; 1954 break; 1955 default: 1956 ShouldNotReachHere(); 1957 } 1958 return size; 1959} 1960 1961// Limits on vector size (number of elements) loaded into vector. 1962const int Matcher::max_vector_size(const BasicType bt) { 1963 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1964} 1965const int Matcher::min_vector_size(const BasicType bt) { 1966 int max_size = max_vector_size(bt); 1967 // Min size which can be loaded into vector is 4 bytes. 1968 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1969 // Support for calling svml double64 vectors 1970 if (bt == T_DOUBLE) { 1971 size = 1; 1972 } 1973 return MIN2(size,max_size); 1974} 1975 1976const int Matcher::scalable_vector_reg_size(const BasicType bt) { 1977 return -1; 1978} 1979 1980// Vector ideal reg corresponding to specified size in bytes 1981const uint Matcher::vector_ideal_reg(int size) { 1982 assert(MaxVectorSize >= size, ""); 1983 switch(size) { 1984 case 4: return Op_VecS; 1985 case 8: return Op_VecD; 1986 case 16: return Op_VecX; 1987 case 32: return Op_VecY; 1988 case 64: return Op_VecZ; 1989 } 1990 ShouldNotReachHere(); 1991 return 0; 1992} 1993 1994// Check for shift by small constant as well 1995static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1996 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1997 shift->in(2)->get_int() <= 3 && 1998 // Are there other uses besides address expressions? 1999 !matcher->is_visited(shift)) { 2000 address_visited.set(shift->_idx); // Flag as address_visited 2001 mstack.push(shift->in(2), Matcher::Visit); 2002 Node *conv = shift->in(1); 2003#ifdef _LP64 2004 // Allow Matcher to match the rule which bypass 2005 // ConvI2L operation for an array index on LP64 2006 // if the index value is positive. 2007 if (conv->Opcode() == Op_ConvI2L && 2008 conv->as_Type()->type()->is_long()->_lo >= 0 && 2009 // Are there other uses besides address expressions? 2010 !matcher->is_visited(conv)) { 2011 address_visited.set(conv->_idx); // Flag as address_visited 2012 mstack.push(conv->in(1), Matcher::Pre_Visit); 2013 } else 2014#endif 2015 mstack.push(conv, Matcher::Pre_Visit); 2016 return true; 2017 } 2018 return false; 2019} 2020 2021// This function identifies sub-graphs in which a 'load' node is 2022// input to two different nodes, and such that it can be matched 2023// with BMI instructions like blsi, blsr, etc. 2024// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2025// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2026// refers to the same node. 2027// 2028// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2029// This is a temporary solution until we make DAGs expressible in ADL. 2030template<typename ConType> 2031class FusedPatternMatcher { 2032 Node* _op1_node; 2033 Node* _mop_node; 2034 int _con_op; 2035 2036 static int match_next(Node* n, int next_op, int next_op_idx) { 2037 if (n->in(1) == NULL || n->in(2) == NULL) { 2038 return -1; 2039 } 2040 2041 if (next_op_idx == -1) { // n is commutative, try rotations 2042 if (n->in(1)->Opcode() == next_op) { 2043 return 1; 2044 } else if (n->in(2)->Opcode() == next_op) { 2045 return 2; 2046 } 2047 } else { 2048 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2049 if (n->in(next_op_idx)->Opcode() == next_op) { 2050 return next_op_idx; 2051 } 2052 } 2053 return -1; 2054 } 2055 2056 public: 2057 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2058 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2059 2060 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2061 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2062 typename ConType::NativeType con_value) { 2063 if (_op1_node->Opcode() != op1) { 2064 return false; 2065 } 2066 if (_mop_node->outcnt() > 2) { 2067 return false; 2068 } 2069 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2070 if (op1_op2_idx == -1) { 2071 return false; 2072 } 2073 // Memory operation must be the other edge 2074 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2075 2076 // Check that the mop node is really what we want 2077 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2078 Node* op2_node = _op1_node->in(op1_op2_idx); 2079 if (op2_node->outcnt() > 1) { 2080 return false; 2081 } 2082 assert(op2_node->Opcode() == op2, "Should be"); 2083 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2084 if (op2_con_idx == -1) { 2085 return false; 2086 } 2087 // Memory operation must be the other edge 2088 int op2_mop_idx = (op2_con_idx & 1) + 1; 2089 // Check that the memory operation is the same node 2090 if (op2_node->in(op2_mop_idx) == _mop_node) { 2091 // Now check the constant 2092 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2093 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2094 return true; 2095 } 2096 } 2097 } 2098 return false; 2099 } 2100}; 2101 2102static bool is_bmi_pattern(Node* n, Node* m) { 2103 assert(UseBMI1Instructions, "sanity"); 2104 if (n != NULL && m != NULL) { 2105 if (m->Opcode() == Op_LoadI) { 2106 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2107 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2108 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2109 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2110 } else if (m->Opcode() == Op_LoadL) { 2111 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2112 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2113 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2114 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2115 } 2116 } 2117 return false; 2118} 2119 2120// Should the matcher clone input 'm' of node 'n'? 2121bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2122 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2123 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2124 mstack.push(m, Visit); 2125 return true; 2126 } 2127 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2128 mstack.push(m, Visit); // m = ShiftCntV 2129 return true; 2130 } 2131 return false; 2132} 2133 2134// Should the Matcher clone shifts on addressing modes, expecting them 2135// to be subsumed into complex addressing expressions or compute them 2136// into registers? 2137bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2138 Node *off = m->in(AddPNode::Offset); 2139 if (off->is_Con()) { 2140 address_visited.test_set(m->_idx); // Flag as address_visited 2141 Node *adr = m->in(AddPNode::Address); 2142 2143 // Intel can handle 2 adds in addressing mode 2144 // AtomicAdd is not an addressing expression. 2145 // Cheap to find it by looking for screwy base. 2146 if (adr->is_AddP() && 2147 !adr->in(AddPNode::Base)->is_top() && 2148 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2149 // Are there other uses besides address expressions? 2150 !is_visited(adr)) { 2151 address_visited.set(adr->_idx); // Flag as address_visited 2152 Node *shift = adr->in(AddPNode::Offset); 2153 if (!clone_shift(shift, this, mstack, address_visited)) { 2154 mstack.push(shift, Pre_Visit); 2155 } 2156 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2157 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2158 } else { 2159 mstack.push(adr, Pre_Visit); 2160 } 2161 2162 // Clone X+offset as it also folds into most addressing expressions 2163 mstack.push(off, Visit); 2164 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2165 return true; 2166 } else if (clone_shift(off, this, mstack, address_visited)) { 2167 address_visited.test_set(m->_idx); // Flag as address_visited 2168 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2169 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2170 return true; 2171 } 2172 return false; 2173} 2174 2175static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2176 switch (bt) { 2177 case BoolTest::eq: 2178 return Assembler::eq; 2179 case BoolTest::ne: 2180 return Assembler::neq; 2181 case BoolTest::le: 2182 case BoolTest::ule: 2183 return Assembler::le; 2184 case BoolTest::ge: 2185 case BoolTest::uge: 2186 return Assembler::nlt; 2187 case BoolTest::lt: 2188 case BoolTest::ult: 2189 return Assembler::lt; 2190 case BoolTest::gt: 2191 case BoolTest::ugt: 2192 return Assembler::nle; 2193 default : ShouldNotReachHere(); return Assembler::_false; 2194 } 2195} 2196 2197static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2198 switch (bt) { 2199 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2200 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2201 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2202 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2203 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2204 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2205 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2206 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2207 } 2208} 2209 2210// Helper methods for MachSpillCopyNode::implementation(). 2211static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2212 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2213 assert(ireg == Op_VecS || // 32bit vector 2214 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2215 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2216 "no non-adjacent vector moves" ); 2217 if (cbuf) { 2218 C2_MacroAssembler _masm(cbuf); 2219 switch (ireg) { 2220 case Op_VecS: // copy whole register 2221 case Op_VecD: 2222 case Op_VecX: 2223#ifndef _LP64 2224 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2225#else 2226 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2227 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2228 } else { 2229 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2230 } 2231#endif 2232 break; 2233 case Op_VecY: 2234#ifndef _LP64 2235 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2236#else 2237 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2238 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2239 } else { 2240 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2241 } 2242#endif 2243 break; 2244 case Op_VecZ: 2245 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2246 break; 2247 default: 2248 ShouldNotReachHere(); 2249 } 2250#ifndef PRODUCT 2251 } else { 2252 switch (ireg) { 2253 case Op_VecS: 2254 case Op_VecD: 2255 case Op_VecX: 2256 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2257 break; 2258 case Op_VecY: 2259 case Op_VecZ: 2260 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2261 break; 2262 default: 2263 ShouldNotReachHere(); 2264 } 2265#endif 2266 } 2267} 2268 2269void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2270 int stack_offset, int reg, uint ireg, outputStream* st) { 2271 if (cbuf) { 2272 C2_MacroAssembler _masm(cbuf); 2273 if (is_load) { 2274 switch (ireg) { 2275 case Op_VecS: 2276 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2277 break; 2278 case Op_VecD: 2279 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2280 break; 2281 case Op_VecX: 2282#ifndef _LP64 2283 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2284#else 2285 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2286 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2287 } else { 2288 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2289 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2290 } 2291#endif 2292 break; 2293 case Op_VecY: 2294#ifndef _LP64 2295 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2296#else 2297 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2298 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2299 } else { 2300 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2301 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2302 } 2303#endif 2304 break; 2305 case Op_VecZ: 2306 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2307 break; 2308 default: 2309 ShouldNotReachHere(); 2310 } 2311 } else { // store 2312 switch (ireg) { 2313 case Op_VecS: 2314 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2315 break; 2316 case Op_VecD: 2317 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2318 break; 2319 case Op_VecX: 2320#ifndef _LP64 2321 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2322#else 2323 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2324 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2325 } 2326 else { 2327 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2328 } 2329#endif 2330 break; 2331 case Op_VecY: 2332#ifndef _LP64 2333 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2334#else 2335 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2336 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2337 } 2338 else { 2339 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2340 } 2341#endif 2342 break; 2343 case Op_VecZ: 2344 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2345 break; 2346 default: 2347 ShouldNotReachHere(); 2348 } 2349 } 2350#ifndef PRODUCT 2351 } else { 2352 if (is_load) { 2353 switch (ireg) { 2354 case Op_VecS: 2355 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2356 break; 2357 case Op_VecD: 2358 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2359 break; 2360 case Op_VecX: 2361 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2362 break; 2363 case Op_VecY: 2364 case Op_VecZ: 2365 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2366 break; 2367 default: 2368 ShouldNotReachHere(); 2369 } 2370 } else { // store 2371 switch (ireg) { 2372 case Op_VecS: 2373 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2374 break; 2375 case Op_VecD: 2376 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2377 break; 2378 case Op_VecX: 2379 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2380 break; 2381 case Op_VecY: 2382 case Op_VecZ: 2383 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2384 break; 2385 default: 2386 ShouldNotReachHere(); 2387 } 2388 } 2389#endif 2390 } 2391} 2392 2393static inline jlong replicate8_imm(int con, int width) { 2394 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 2395 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 2396 int bit_width = width * 8; 2397 jlong val = con; 2398 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 2399 while(bit_width < 64) { 2400 val |= (val << bit_width); 2401 bit_width <<= 1; 2402 } 2403 return val; 2404} 2405 2406#ifndef PRODUCT 2407 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2408 st->print("nop \t# %d bytes pad for loops and calls", _count); 2409 } 2410#endif 2411 2412 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2413 C2_MacroAssembler _masm(&cbuf); 2414 __ nop(_count); 2415 } 2416 2417 uint MachNopNode::size(PhaseRegAlloc*) const { 2418 return _count; 2419 } 2420 2421#ifndef PRODUCT 2422 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2423 st->print("# breakpoint"); 2424 } 2425#endif 2426 2427 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2428 C2_MacroAssembler _masm(&cbuf); 2429 __ int3(); 2430 } 2431 2432 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2433 return MachNode::size(ra_); 2434 } 2435 2436%} 2437 2438encode %{ 2439 2440 enc_class call_epilog %{ 2441 if (VerifyStackAtCalls) { 2442 // Check that stack depth is unchanged: find majik cookie on stack 2443 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2444 C2_MacroAssembler _masm(&cbuf); 2445 Label L; 2446 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2447 __ jccb(Assembler::equal, L); 2448 // Die if stack mismatch 2449 __ int3(); 2450 __ bind(L); 2451 } 2452 %} 2453 2454%} 2455 2456// Operands for bound floating pointer register arguments 2457operand rxmm0() %{ 2458 constraint(ALLOC_IN_RC(xmm0_reg)); 2459 match(VecX); 2460 format%{%} 2461 interface(REG_INTER); 2462%} 2463 2464//----------OPERANDS----------------------------------------------------------- 2465// Operand definitions must precede instruction definitions for correct parsing 2466// in the ADLC because operands constitute user defined types which are used in 2467// instruction definitions. 2468 2469// Vectors 2470 2471// Dummy generic vector class. Should be used for all vector operands. 2472// Replaced with vec[SDXYZ] during post-selection pass. 2473operand vec() %{ 2474 constraint(ALLOC_IN_RC(dynamic)); 2475 match(VecX); 2476 match(VecY); 2477 match(VecZ); 2478 match(VecS); 2479 match(VecD); 2480 2481 format %{ %} 2482 interface(REG_INTER); 2483%} 2484 2485// Dummy generic legacy vector class. Should be used for all legacy vector operands. 2486// Replaced with legVec[SDXYZ] during post-selection cleanup. 2487// Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2488// runtime code generation via reg_class_dynamic. 2489operand legVec() %{ 2490 constraint(ALLOC_IN_RC(dynamic)); 2491 match(VecX); 2492 match(VecY); 2493 match(VecZ); 2494 match(VecS); 2495 match(VecD); 2496 2497 format %{ %} 2498 interface(REG_INTER); 2499%} 2500 2501// Replaces vec during post-selection cleanup. See above. 2502operand vecS() %{ 2503 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2504 match(VecS); 2505 2506 format %{ %} 2507 interface(REG_INTER); 2508%} 2509 2510// Replaces legVec during post-selection cleanup. See above. 2511operand legVecS() %{ 2512 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2513 match(VecS); 2514 2515 format %{ %} 2516 interface(REG_INTER); 2517%} 2518 2519// Replaces vec during post-selection cleanup. See above. 2520operand vecD() %{ 2521 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2522 match(VecD); 2523 2524 format %{ %} 2525 interface(REG_INTER); 2526%} 2527 2528// Replaces legVec during post-selection cleanup. See above. 2529operand legVecD() %{ 2530 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2531 match(VecD); 2532 2533 format %{ %} 2534 interface(REG_INTER); 2535%} 2536 2537// Replaces vec during post-selection cleanup. See above. 2538operand vecX() %{ 2539 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2540 match(VecX); 2541 2542 format %{ %} 2543 interface(REG_INTER); 2544%} 2545 2546// Replaces legVec during post-selection cleanup. See above. 2547operand legVecX() %{ 2548 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2549 match(VecX); 2550 2551 format %{ %} 2552 interface(REG_INTER); 2553%} 2554 2555// Replaces vec during post-selection cleanup. See above. 2556operand vecY() %{ 2557 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2558 match(VecY); 2559 2560 format %{ %} 2561 interface(REG_INTER); 2562%} 2563 2564// Replaces legVec during post-selection cleanup. See above. 2565operand legVecY() %{ 2566 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2567 match(VecY); 2568 2569 format %{ %} 2570 interface(REG_INTER); 2571%} 2572 2573// Replaces vec during post-selection cleanup. See above. 2574operand vecZ() %{ 2575 constraint(ALLOC_IN_RC(vectorz_reg)); 2576 match(VecZ); 2577 2578 format %{ %} 2579 interface(REG_INTER); 2580%} 2581 2582// Replaces legVec during post-selection cleanup. See above. 2583operand legVecZ() %{ 2584 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2585 match(VecZ); 2586 2587 format %{ %} 2588 interface(REG_INTER); 2589%} 2590 2591// Comparison Code for FP conditional move 2592operand cmpOp_vcmppd() %{ 2593 match(Bool); 2594 2595 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2596 n->as_Bool()->_test._test != BoolTest::no_overflow); 2597 format %{ "" %} 2598 interface(COND_INTER) %{ 2599 equal (0x0, "eq"); 2600 less (0x1, "lt"); 2601 less_equal (0x2, "le"); 2602 not_equal (0xC, "ne"); 2603 greater_equal(0xD, "ge"); 2604 greater (0xE, "gt"); 2605 //TODO cannot compile (adlc breaks) without two next lines with error: 2606 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2607 // equal' for overflow. 2608 overflow (0x20, "o"); // not really supported by the instruction 2609 no_overflow (0x21, "no"); // not really supported by the instruction 2610 %} 2611%} 2612 2613 2614// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2615 2616// ============================================================================ 2617 2618instruct ShouldNotReachHere() %{ 2619 match(Halt); 2620 format %{ "stop\t# ShouldNotReachHere" %} 2621 ins_encode %{ 2622 if (is_reachable()) { 2623 __ stop(_halt_reason); 2624 } 2625 %} 2626 ins_pipe(pipe_slow); 2627%} 2628 2629// =================================EVEX special=============================== 2630// Existing partial implementation for post-loop multi-versioning computes 2631// the mask corresponding to tail loop in K1 opmask register. This may then be 2632// used for predicating instructions in loop body during last post-loop iteration. 2633// TODO: Remove hard-coded K1 usage while fixing existing post-loop 2634// multiversioning support. 2635instruct setMask(rRegI dst, rRegI src, kReg_K1 mask) %{ 2636 predicate(PostLoopMultiversioning && Matcher::has_predicated_vectors()); 2637 match(Set dst (SetVectMaskI src)); 2638 effect(TEMP dst); 2639 format %{ "setvectmask $dst, $src" %} 2640 ins_encode %{ 2641 __ setvectmask($dst$$Register, $src$$Register, $mask$$KRegister); 2642 %} 2643 ins_pipe(pipe_slow); 2644%} 2645 2646// ============================================================================ 2647 2648instruct addF_reg(regF dst, regF src) %{ 2649 predicate((UseSSE>=1) && (UseAVX == 0)); 2650 match(Set dst (AddF dst src)); 2651 2652 format %{ "addss $dst, $src" %} 2653 ins_cost(150); 2654 ins_encode %{ 2655 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2656 %} 2657 ins_pipe(pipe_slow); 2658%} 2659 2660instruct addF_mem(regF dst, memory src) %{ 2661 predicate((UseSSE>=1) && (UseAVX == 0)); 2662 match(Set dst (AddF dst (LoadF src))); 2663 2664 format %{ "addss $dst, $src" %} 2665 ins_cost(150); 2666 ins_encode %{ 2667 __ addss($dst$$XMMRegister, $src$$Address); 2668 %} 2669 ins_pipe(pipe_slow); 2670%} 2671 2672instruct addF_imm(regF dst, immF con) %{ 2673 predicate((UseSSE>=1) && (UseAVX == 0)); 2674 match(Set dst (AddF dst con)); 2675 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2676 ins_cost(150); 2677 ins_encode %{ 2678 __ addss($dst$$XMMRegister, $constantaddress($con)); 2679 %} 2680 ins_pipe(pipe_slow); 2681%} 2682 2683instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2684 predicate(UseAVX > 0); 2685 match(Set dst (AddF src1 src2)); 2686 2687 format %{ "vaddss $dst, $src1, $src2" %} 2688 ins_cost(150); 2689 ins_encode %{ 2690 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2691 %} 2692 ins_pipe(pipe_slow); 2693%} 2694 2695instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2696 predicate(UseAVX > 0); 2697 match(Set dst (AddF src1 (LoadF src2))); 2698 2699 format %{ "vaddss $dst, $src1, $src2" %} 2700 ins_cost(150); 2701 ins_encode %{ 2702 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2703 %} 2704 ins_pipe(pipe_slow); 2705%} 2706 2707instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2708 predicate(UseAVX > 0); 2709 match(Set dst (AddF src con)); 2710 2711 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2712 ins_cost(150); 2713 ins_encode %{ 2714 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2715 %} 2716 ins_pipe(pipe_slow); 2717%} 2718 2719instruct addD_reg(regD dst, regD src) %{ 2720 predicate((UseSSE>=2) && (UseAVX == 0)); 2721 match(Set dst (AddD dst src)); 2722 2723 format %{ "addsd $dst, $src" %} 2724 ins_cost(150); 2725 ins_encode %{ 2726 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2727 %} 2728 ins_pipe(pipe_slow); 2729%} 2730 2731instruct addD_mem(regD dst, memory src) %{ 2732 predicate((UseSSE>=2) && (UseAVX == 0)); 2733 match(Set dst (AddD dst (LoadD src))); 2734 2735 format %{ "addsd $dst, $src" %} 2736 ins_cost(150); 2737 ins_encode %{ 2738 __ addsd($dst$$XMMRegister, $src$$Address); 2739 %} 2740 ins_pipe(pipe_slow); 2741%} 2742 2743instruct addD_imm(regD dst, immD con) %{ 2744 predicate((UseSSE>=2) && (UseAVX == 0)); 2745 match(Set dst (AddD dst con)); 2746 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2750 %} 2751 ins_pipe(pipe_slow); 2752%} 2753 2754instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2755 predicate(UseAVX > 0); 2756 match(Set dst (AddD src1 src2)); 2757 2758 format %{ "vaddsd $dst, $src1, $src2" %} 2759 ins_cost(150); 2760 ins_encode %{ 2761 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2762 %} 2763 ins_pipe(pipe_slow); 2764%} 2765 2766instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2767 predicate(UseAVX > 0); 2768 match(Set dst (AddD src1 (LoadD src2))); 2769 2770 format %{ "vaddsd $dst, $src1, $src2" %} 2771 ins_cost(150); 2772 ins_encode %{ 2773 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2774 %} 2775 ins_pipe(pipe_slow); 2776%} 2777 2778instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2779 predicate(UseAVX > 0); 2780 match(Set dst (AddD src con)); 2781 2782 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2783 ins_cost(150); 2784 ins_encode %{ 2785 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2786 %} 2787 ins_pipe(pipe_slow); 2788%} 2789 2790instruct subF_reg(regF dst, regF src) %{ 2791 predicate((UseSSE>=1) && (UseAVX == 0)); 2792 match(Set dst (SubF dst src)); 2793 2794 format %{ "subss $dst, $src" %} 2795 ins_cost(150); 2796 ins_encode %{ 2797 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2798 %} 2799 ins_pipe(pipe_slow); 2800%} 2801 2802instruct subF_mem(regF dst, memory src) %{ 2803 predicate((UseSSE>=1) && (UseAVX == 0)); 2804 match(Set dst (SubF dst (LoadF src))); 2805 2806 format %{ "subss $dst, $src" %} 2807 ins_cost(150); 2808 ins_encode %{ 2809 __ subss($dst$$XMMRegister, $src$$Address); 2810 %} 2811 ins_pipe(pipe_slow); 2812%} 2813 2814instruct subF_imm(regF dst, immF con) %{ 2815 predicate((UseSSE>=1) && (UseAVX == 0)); 2816 match(Set dst (SubF dst con)); 2817 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2818 ins_cost(150); 2819 ins_encode %{ 2820 __ subss($dst$$XMMRegister, $constantaddress($con)); 2821 %} 2822 ins_pipe(pipe_slow); 2823%} 2824 2825instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2826 predicate(UseAVX > 0); 2827 match(Set dst (SubF src1 src2)); 2828 2829 format %{ "vsubss $dst, $src1, $src2" %} 2830 ins_cost(150); 2831 ins_encode %{ 2832 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2833 %} 2834 ins_pipe(pipe_slow); 2835%} 2836 2837instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2838 predicate(UseAVX > 0); 2839 match(Set dst (SubF src1 (LoadF src2))); 2840 2841 format %{ "vsubss $dst, $src1, $src2" %} 2842 ins_cost(150); 2843 ins_encode %{ 2844 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2845 %} 2846 ins_pipe(pipe_slow); 2847%} 2848 2849instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2850 predicate(UseAVX > 0); 2851 match(Set dst (SubF src con)); 2852 2853 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2854 ins_cost(150); 2855 ins_encode %{ 2856 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2857 %} 2858 ins_pipe(pipe_slow); 2859%} 2860 2861instruct subD_reg(regD dst, regD src) %{ 2862 predicate((UseSSE>=2) && (UseAVX == 0)); 2863 match(Set dst (SubD dst src)); 2864 2865 format %{ "subsd $dst, $src" %} 2866 ins_cost(150); 2867 ins_encode %{ 2868 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2869 %} 2870 ins_pipe(pipe_slow); 2871%} 2872 2873instruct subD_mem(regD dst, memory src) %{ 2874 predicate((UseSSE>=2) && (UseAVX == 0)); 2875 match(Set dst (SubD dst (LoadD src))); 2876 2877 format %{ "subsd $dst, $src" %} 2878 ins_cost(150); 2879 ins_encode %{ 2880 __ subsd($dst$$XMMRegister, $src$$Address); 2881 %} 2882 ins_pipe(pipe_slow); 2883%} 2884 2885instruct subD_imm(regD dst, immD con) %{ 2886 predicate((UseSSE>=2) && (UseAVX == 0)); 2887 match(Set dst (SubD dst con)); 2888 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2889 ins_cost(150); 2890 ins_encode %{ 2891 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2892 %} 2893 ins_pipe(pipe_slow); 2894%} 2895 2896instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2897 predicate(UseAVX > 0); 2898 match(Set dst (SubD src1 src2)); 2899 2900 format %{ "vsubsd $dst, $src1, $src2" %} 2901 ins_cost(150); 2902 ins_encode %{ 2903 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2904 %} 2905 ins_pipe(pipe_slow); 2906%} 2907 2908instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2909 predicate(UseAVX > 0); 2910 match(Set dst (SubD src1 (LoadD src2))); 2911 2912 format %{ "vsubsd $dst, $src1, $src2" %} 2913 ins_cost(150); 2914 ins_encode %{ 2915 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2916 %} 2917 ins_pipe(pipe_slow); 2918%} 2919 2920instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2921 predicate(UseAVX > 0); 2922 match(Set dst (SubD src con)); 2923 2924 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2925 ins_cost(150); 2926 ins_encode %{ 2927 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2928 %} 2929 ins_pipe(pipe_slow); 2930%} 2931 2932instruct mulF_reg(regF dst, regF src) %{ 2933 predicate((UseSSE>=1) && (UseAVX == 0)); 2934 match(Set dst (MulF dst src)); 2935 2936 format %{ "mulss $dst, $src" %} 2937 ins_cost(150); 2938 ins_encode %{ 2939 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2940 %} 2941 ins_pipe(pipe_slow); 2942%} 2943 2944instruct mulF_mem(regF dst, memory src) %{ 2945 predicate((UseSSE>=1) && (UseAVX == 0)); 2946 match(Set dst (MulF dst (LoadF src))); 2947 2948 format %{ "mulss $dst, $src" %} 2949 ins_cost(150); 2950 ins_encode %{ 2951 __ mulss($dst$$XMMRegister, $src$$Address); 2952 %} 2953 ins_pipe(pipe_slow); 2954%} 2955 2956instruct mulF_imm(regF dst, immF con) %{ 2957 predicate((UseSSE>=1) && (UseAVX == 0)); 2958 match(Set dst (MulF dst con)); 2959 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2960 ins_cost(150); 2961 ins_encode %{ 2962 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2963 %} 2964 ins_pipe(pipe_slow); 2965%} 2966 2967instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2968 predicate(UseAVX > 0); 2969 match(Set dst (MulF src1 src2)); 2970 2971 format %{ "vmulss $dst, $src1, $src2" %} 2972 ins_cost(150); 2973 ins_encode %{ 2974 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2975 %} 2976 ins_pipe(pipe_slow); 2977%} 2978 2979instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2980 predicate(UseAVX > 0); 2981 match(Set dst (MulF src1 (LoadF src2))); 2982 2983 format %{ "vmulss $dst, $src1, $src2" %} 2984 ins_cost(150); 2985 ins_encode %{ 2986 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2987 %} 2988 ins_pipe(pipe_slow); 2989%} 2990 2991instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2992 predicate(UseAVX > 0); 2993 match(Set dst (MulF src con)); 2994 2995 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2996 ins_cost(150); 2997 ins_encode %{ 2998 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2999 %} 3000 ins_pipe(pipe_slow); 3001%} 3002 3003instruct mulD_reg(regD dst, regD src) %{ 3004 predicate((UseSSE>=2) && (UseAVX == 0)); 3005 match(Set dst (MulD dst src)); 3006 3007 format %{ "mulsd $dst, $src" %} 3008 ins_cost(150); 3009 ins_encode %{ 3010 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3011 %} 3012 ins_pipe(pipe_slow); 3013%} 3014 3015instruct mulD_mem(regD dst, memory src) %{ 3016 predicate((UseSSE>=2) && (UseAVX == 0)); 3017 match(Set dst (MulD dst (LoadD src))); 3018 3019 format %{ "mulsd $dst, $src" %} 3020 ins_cost(150); 3021 ins_encode %{ 3022 __ mulsd($dst$$XMMRegister, $src$$Address); 3023 %} 3024 ins_pipe(pipe_slow); 3025%} 3026 3027instruct mulD_imm(regD dst, immD con) %{ 3028 predicate((UseSSE>=2) && (UseAVX == 0)); 3029 match(Set dst (MulD dst con)); 3030 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3031 ins_cost(150); 3032 ins_encode %{ 3033 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3034 %} 3035 ins_pipe(pipe_slow); 3036%} 3037 3038instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3039 predicate(UseAVX > 0); 3040 match(Set dst (MulD src1 src2)); 3041 3042 format %{ "vmulsd $dst, $src1, $src2" %} 3043 ins_cost(150); 3044 ins_encode %{ 3045 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3046 %} 3047 ins_pipe(pipe_slow); 3048%} 3049 3050instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3051 predicate(UseAVX > 0); 3052 match(Set dst (MulD src1 (LoadD src2))); 3053 3054 format %{ "vmulsd $dst, $src1, $src2" %} 3055 ins_cost(150); 3056 ins_encode %{ 3057 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3058 %} 3059 ins_pipe(pipe_slow); 3060%} 3061 3062instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3063 predicate(UseAVX > 0); 3064 match(Set dst (MulD src con)); 3065 3066 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3067 ins_cost(150); 3068 ins_encode %{ 3069 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3070 %} 3071 ins_pipe(pipe_slow); 3072%} 3073 3074instruct divF_reg(regF dst, regF src) %{ 3075 predicate((UseSSE>=1) && (UseAVX == 0)); 3076 match(Set dst (DivF dst src)); 3077 3078 format %{ "divss $dst, $src" %} 3079 ins_cost(150); 3080 ins_encode %{ 3081 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3082 %} 3083 ins_pipe(pipe_slow); 3084%} 3085 3086instruct divF_mem(regF dst, memory src) %{ 3087 predicate((UseSSE>=1) && (UseAVX == 0)); 3088 match(Set dst (DivF dst (LoadF src))); 3089 3090 format %{ "divss $dst, $src" %} 3091 ins_cost(150); 3092 ins_encode %{ 3093 __ divss($dst$$XMMRegister, $src$$Address); 3094 %} 3095 ins_pipe(pipe_slow); 3096%} 3097 3098instruct divF_imm(regF dst, immF con) %{ 3099 predicate((UseSSE>=1) && (UseAVX == 0)); 3100 match(Set dst (DivF dst con)); 3101 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3102 ins_cost(150); 3103 ins_encode %{ 3104 __ divss($dst$$XMMRegister, $constantaddress($con)); 3105 %} 3106 ins_pipe(pipe_slow); 3107%} 3108 3109instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3110 predicate(UseAVX > 0); 3111 match(Set dst (DivF src1 src2)); 3112 3113 format %{ "vdivss $dst, $src1, $src2" %} 3114 ins_cost(150); 3115 ins_encode %{ 3116 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3117 %} 3118 ins_pipe(pipe_slow); 3119%} 3120 3121instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3122 predicate(UseAVX > 0); 3123 match(Set dst (DivF src1 (LoadF src2))); 3124 3125 format %{ "vdivss $dst, $src1, $src2" %} 3126 ins_cost(150); 3127 ins_encode %{ 3128 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3129 %} 3130 ins_pipe(pipe_slow); 3131%} 3132 3133instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3134 predicate(UseAVX > 0); 3135 match(Set dst (DivF src con)); 3136 3137 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3138 ins_cost(150); 3139 ins_encode %{ 3140 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3141 %} 3142 ins_pipe(pipe_slow); 3143%} 3144 3145instruct divD_reg(regD dst, regD src) %{ 3146 predicate((UseSSE>=2) && (UseAVX == 0)); 3147 match(Set dst (DivD dst src)); 3148 3149 format %{ "divsd $dst, $src" %} 3150 ins_cost(150); 3151 ins_encode %{ 3152 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3153 %} 3154 ins_pipe(pipe_slow); 3155%} 3156 3157instruct divD_mem(regD dst, memory src) %{ 3158 predicate((UseSSE>=2) && (UseAVX == 0)); 3159 match(Set dst (DivD dst (LoadD src))); 3160 3161 format %{ "divsd $dst, $src" %} 3162 ins_cost(150); 3163 ins_encode %{ 3164 __ divsd($dst$$XMMRegister, $src$$Address); 3165 %} 3166 ins_pipe(pipe_slow); 3167%} 3168 3169instruct divD_imm(regD dst, immD con) %{ 3170 predicate((UseSSE>=2) && (UseAVX == 0)); 3171 match(Set dst (DivD dst con)); 3172 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3173 ins_cost(150); 3174 ins_encode %{ 3175 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3176 %} 3177 ins_pipe(pipe_slow); 3178%} 3179 3180instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3181 predicate(UseAVX > 0); 3182 match(Set dst (DivD src1 src2)); 3183 3184 format %{ "vdivsd $dst, $src1, $src2" %} 3185 ins_cost(150); 3186 ins_encode %{ 3187 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3188 %} 3189 ins_pipe(pipe_slow); 3190%} 3191 3192instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3193 predicate(UseAVX > 0); 3194 match(Set dst (DivD src1 (LoadD src2))); 3195 3196 format %{ "vdivsd $dst, $src1, $src2" %} 3197 ins_cost(150); 3198 ins_encode %{ 3199 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3200 %} 3201 ins_pipe(pipe_slow); 3202%} 3203 3204instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3205 predicate(UseAVX > 0); 3206 match(Set dst (DivD src con)); 3207 3208 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3209 ins_cost(150); 3210 ins_encode %{ 3211 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3212 %} 3213 ins_pipe(pipe_slow); 3214%} 3215 3216instruct absF_reg(regF dst) %{ 3217 predicate((UseSSE>=1) && (UseAVX == 0)); 3218 match(Set dst (AbsF dst)); 3219 ins_cost(150); 3220 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3221 ins_encode %{ 3222 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3223 %} 3224 ins_pipe(pipe_slow); 3225%} 3226 3227instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3228 predicate(UseAVX > 0); 3229 match(Set dst (AbsF src)); 3230 ins_cost(150); 3231 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3232 ins_encode %{ 3233 int vlen_enc = Assembler::AVX_128bit; 3234 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3235 ExternalAddress(float_signmask()), vlen_enc); 3236 %} 3237 ins_pipe(pipe_slow); 3238%} 3239 3240instruct absD_reg(regD dst) %{ 3241 predicate((UseSSE>=2) && (UseAVX == 0)); 3242 match(Set dst (AbsD dst)); 3243 ins_cost(150); 3244 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3245 "# abs double by sign masking" %} 3246 ins_encode %{ 3247 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3248 %} 3249 ins_pipe(pipe_slow); 3250%} 3251 3252instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3253 predicate(UseAVX > 0); 3254 match(Set dst (AbsD src)); 3255 ins_cost(150); 3256 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3257 "# abs double by sign masking" %} 3258 ins_encode %{ 3259 int vlen_enc = Assembler::AVX_128bit; 3260 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3261 ExternalAddress(double_signmask()), vlen_enc); 3262 %} 3263 ins_pipe(pipe_slow); 3264%} 3265 3266instruct negF_reg(regF dst) %{ 3267 predicate((UseSSE>=1) && (UseAVX == 0)); 3268 match(Set dst (NegF dst)); 3269 ins_cost(150); 3270 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3271 ins_encode %{ 3272 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3273 %} 3274 ins_pipe(pipe_slow); 3275%} 3276 3277instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3278 predicate(UseAVX > 0); 3279 match(Set dst (NegF src)); 3280 ins_cost(150); 3281 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3282 ins_encode %{ 3283 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3284 ExternalAddress(float_signflip())); 3285 %} 3286 ins_pipe(pipe_slow); 3287%} 3288 3289instruct negD_reg(regD dst) %{ 3290 predicate((UseSSE>=2) && (UseAVX == 0)); 3291 match(Set dst (NegD dst)); 3292 ins_cost(150); 3293 format %{ "xorpd $dst, [0x8000000000000000]\t" 3294 "# neg double by sign flipping" %} 3295 ins_encode %{ 3296 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3297 %} 3298 ins_pipe(pipe_slow); 3299%} 3300 3301instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3302 predicate(UseAVX > 0); 3303 match(Set dst (NegD src)); 3304 ins_cost(150); 3305 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3306 "# neg double by sign flipping" %} 3307 ins_encode %{ 3308 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3309 ExternalAddress(double_signflip())); 3310 %} 3311 ins_pipe(pipe_slow); 3312%} 3313 3314// sqrtss instruction needs destination register to be pre initialized for best performance 3315// Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3316instruct sqrtF_reg(regF dst) %{ 3317 predicate(UseSSE>=1); 3318 match(Set dst (SqrtF dst)); 3319 format %{ "sqrtss $dst, $dst" %} 3320 ins_encode %{ 3321 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3322 %} 3323 ins_pipe(pipe_slow); 3324%} 3325 3326// sqrtsd instruction needs destination register to be pre initialized for best performance 3327// Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3328instruct sqrtD_reg(regD dst) %{ 3329 predicate(UseSSE>=2); 3330 match(Set dst (SqrtD dst)); 3331 format %{ "sqrtsd $dst, $dst" %} 3332 ins_encode %{ 3333 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3334 %} 3335 ins_pipe(pipe_slow); 3336%} 3337 3338// ---------------------------------------- VectorReinterpret ------------------------------------ 3339 3340instruct reinterpret(vec dst) %{ 3341 predicate(vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1))); // dst == src 3342 match(Set dst (VectorReinterpret dst)); 3343 ins_cost(125); 3344 format %{ "vector_reinterpret $dst\t!" %} 3345 ins_encode %{ 3346 // empty 3347 %} 3348 ins_pipe( pipe_slow ); 3349%} 3350 3351instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ 3352 predicate(UseAVX == 0 && 3353 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3354 match(Set dst (VectorReinterpret src)); 3355 ins_cost(125); 3356 effect(TEMP dst, TEMP scratch); 3357 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3358 ins_encode %{ 3359 assert(vector_length_in_bytes(this) <= 16, "required"); 3360 assert(vector_length_in_bytes(this, $src) <= 8, "required"); 3361 3362 int src_vlen_in_bytes = vector_length_in_bytes(this, $src); 3363 if (src_vlen_in_bytes == 4) { 3364 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3365 } else { 3366 assert(src_vlen_in_bytes == 8, ""); 3367 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3368 } 3369 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3370 %} 3371 ins_pipe( pipe_slow ); 3372%} 3373 3374instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ 3375 predicate(UseAVX > 0 && 3376 (vector_length_in_bytes(n->in(1)) == 4) && // src 3377 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3378 match(Set dst (VectorReinterpret src)); 3379 ins_cost(125); 3380 effect(TEMP scratch); 3381 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3382 ins_encode %{ 3383 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); 3384 %} 3385 ins_pipe( pipe_slow ); 3386%} 3387 3388 3389instruct vreinterpret_expand(legVec dst, vec src) %{ 3390 predicate(UseAVX > 0 && 3391 (vector_length_in_bytes(n->in(1)) > 4) && // src 3392 (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst 3393 match(Set dst (VectorReinterpret src)); 3394 ins_cost(125); 3395 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3396 ins_encode %{ 3397 switch (vector_length_in_bytes(this, $src)) { 3398 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3399 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3400 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3401 default: ShouldNotReachHere(); 3402 } 3403 %} 3404 ins_pipe( pipe_slow ); 3405%} 3406 3407instruct reinterpret_shrink(vec dst, legVec src) %{ 3408 predicate(vector_length_in_bytes(n->in(1)) > vector_length_in_bytes(n)); // src > dst 3409 match(Set dst (VectorReinterpret src)); 3410 ins_cost(125); 3411 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3412 ins_encode %{ 3413 switch (vector_length_in_bytes(this)) { 3414 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3415 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3416 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3417 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3418 default: ShouldNotReachHere(); 3419 } 3420 %} 3421 ins_pipe( pipe_slow ); 3422%} 3423 3424// ---------------------------------------------------------------------------------------------------- 3425 3426#ifdef _LP64 3427instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3428 match(Set dst (RoundDoubleMode src rmode)); 3429 format %{ "roundsd $dst,$src" %} 3430 ins_cost(150); 3431 ins_encode %{ 3432 assert(UseSSE >= 4, "required"); 3433 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3434 %} 3435 ins_pipe(pipe_slow); 3436%} 3437 3438instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3439 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3440 format %{ "roundsd $dst,$src" %} 3441 ins_cost(150); 3442 ins_encode %{ 3443 assert(UseSSE >= 4, "required"); 3444 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3445 %} 3446 ins_pipe(pipe_slow); 3447%} 3448 3449instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3450 match(Set dst (RoundDoubleMode con rmode)); 3451 effect(TEMP scratch_reg); 3452 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3453 ins_cost(150); 3454 ins_encode %{ 3455 assert(UseSSE >= 4, "required"); 3456 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3457 %} 3458 ins_pipe(pipe_slow); 3459%} 3460 3461instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3462 predicate(vector_length(n) < 8); 3463 match(Set dst (RoundDoubleModeV src rmode)); 3464 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3465 ins_encode %{ 3466 assert(UseAVX > 0, "required"); 3467 int vlen_enc = vector_length_encoding(this); 3468 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3469 %} 3470 ins_pipe( pipe_slow ); 3471%} 3472 3473instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3474 predicate(vector_length(n) == 8); 3475 match(Set dst (RoundDoubleModeV src rmode)); 3476 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3477 ins_encode %{ 3478 assert(UseAVX > 2, "required"); 3479 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3480 %} 3481 ins_pipe( pipe_slow ); 3482%} 3483 3484instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3485 predicate(vector_length(n) < 8); 3486 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3487 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3488 ins_encode %{ 3489 assert(UseAVX > 0, "required"); 3490 int vlen_enc = vector_length_encoding(this); 3491 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3492 %} 3493 ins_pipe( pipe_slow ); 3494%} 3495 3496instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3497 predicate(vector_length(n) == 8); 3498 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3499 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3500 ins_encode %{ 3501 assert(UseAVX > 2, "required"); 3502 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3503 %} 3504 ins_pipe( pipe_slow ); 3505%} 3506#endif // _LP64 3507 3508instruct onspinwait() %{ 3509 match(OnSpinWait); 3510 ins_cost(200); 3511 3512 format %{ 3513 $$template 3514 $$emit$$"pause\t! membar_onspinwait" 3515 %} 3516 ins_encode %{ 3517 __ pause(); 3518 %} 3519 ins_pipe(pipe_slow); 3520%} 3521 3522// a * b + c 3523instruct fmaD_reg(regD a, regD b, regD c) %{ 3524 predicate(UseFMA); 3525 match(Set c (FmaD c (Binary a b))); 3526 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3527 ins_cost(150); 3528 ins_encode %{ 3529 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3530 %} 3531 ins_pipe( pipe_slow ); 3532%} 3533 3534// a * b + c 3535instruct fmaF_reg(regF a, regF b, regF c) %{ 3536 predicate(UseFMA); 3537 match(Set c (FmaF c (Binary a b))); 3538 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3539 ins_cost(150); 3540 ins_encode %{ 3541 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3542 %} 3543 ins_pipe( pipe_slow ); 3544%} 3545 3546// ====================VECTOR INSTRUCTIONS===================================== 3547 3548// Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3549instruct MoveVec2Leg(legVec dst, vec src) %{ 3550 match(Set dst src); 3551 format %{ "" %} 3552 ins_encode %{ 3553 ShouldNotReachHere(); 3554 %} 3555 ins_pipe( fpu_reg_reg ); 3556%} 3557 3558instruct MoveLeg2Vec(vec dst, legVec src) %{ 3559 match(Set dst src); 3560 format %{ "" %} 3561 ins_encode %{ 3562 ShouldNotReachHere(); 3563 %} 3564 ins_pipe( fpu_reg_reg ); 3565%} 3566 3567// ============================================================================ 3568 3569// Load vectors generic operand pattern 3570instruct loadV(vec dst, memory mem) %{ 3571 match(Set dst (LoadVector mem)); 3572 ins_cost(125); 3573 format %{ "load_vector $dst,$mem" %} 3574 ins_encode %{ 3575 switch (vector_length_in_bytes(this)) { 3576 case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; 3577 case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; 3578 case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; 3579 case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; 3580 case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; 3581 default: ShouldNotReachHere(); 3582 } 3583 %} 3584 ins_pipe( pipe_slow ); 3585%} 3586 3587// Store vectors generic operand pattern. 3588instruct storeV(memory mem, vec src) %{ 3589 match(Set mem (StoreVector mem src)); 3590 ins_cost(145); 3591 format %{ "store_vector $mem,$src\n\t" %} 3592 ins_encode %{ 3593 switch (vector_length_in_bytes(this, $src)) { 3594 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3595 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3596 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3597 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3598 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3599 default: ShouldNotReachHere(); 3600 } 3601 %} 3602 ins_pipe( pipe_slow ); 3603%} 3604 3605// ---------------------------------------- Gather ------------------------------------ 3606 3607// Gather INT, LONG, FLOAT, DOUBLE 3608 3609instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3610 predicate(vector_length_in_bytes(n) <= 32); 3611 match(Set dst (LoadVectorGather mem idx)); 3612 effect(TEMP dst, TEMP tmp, TEMP mask); 3613 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3614 ins_encode %{ 3615 assert(UseAVX >= 2, "sanity"); 3616 3617 int vlen_enc = vector_length_encoding(this); 3618 BasicType elem_bt = vector_element_basic_type(this); 3619 3620 assert(vector_length_in_bytes(this) >= 16, "sanity"); 3621 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3622 3623 if (vlen_enc == Assembler::AVX_128bit) { 3624 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3625 } else { 3626 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3627 } 3628 __ lea($tmp$$Register, $mem$$Address); 3629 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3630 %} 3631 ins_pipe( pipe_slow ); 3632%} 3633 3634instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 3635 predicate(vector_length_in_bytes(n) == 64); 3636 match(Set dst (LoadVectorGather mem idx)); 3637 effect(TEMP dst, TEMP tmp, TEMP ktmp); 3638 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %} 3639 ins_encode %{ 3640 assert(UseAVX > 2, "sanity"); 3641 3642 int vlen_enc = vector_length_encoding(this); 3643 BasicType elem_bt = vector_element_basic_type(this); 3644 3645 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3646 3647 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3648 __ lea($tmp$$Register, $mem$$Address); 3649 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3650 %} 3651 ins_pipe( pipe_slow ); 3652%} 3653 3654// ====================Scatter======================================= 3655 3656// Scatter INT, LONG, FLOAT, DOUBLE 3657 3658instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 3659 predicate(UseAVX > 2); 3660 match(Set mem (StoreVectorScatter mem (Binary src idx))); 3661 effect(TEMP tmp, TEMP ktmp); 3662 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 3663 ins_encode %{ 3664 int vlen_enc = vector_length_encoding(this, $src); 3665 BasicType elem_bt = vector_element_basic_type(this, $src); 3666 3667 assert(vector_length_in_bytes(this, $src) >= 16, "sanity"); 3668 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3669 3670 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3671 __ lea($tmp$$Register, $mem$$Address); 3672 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 3673 %} 3674 ins_pipe( pipe_slow ); 3675%} 3676 3677// ====================REPLICATE======================================= 3678 3679// Replicate byte scalar to be vector 3680instruct ReplB_reg(vec dst, rRegI src) %{ 3681 match(Set dst (ReplicateB src)); 3682 format %{ "replicateB $dst,$src" %} 3683 ins_encode %{ 3684 uint vlen = vector_length(this); 3685 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3686 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 3687 int vlen_enc = vector_length_encoding(this); 3688 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 3689 } else if (VM_Version::supports_avx2()) { 3690 int vlen_enc = vector_length_encoding(this); 3691 __ movdl($dst$$XMMRegister, $src$$Register); 3692 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3693 } else { 3694 __ movdl($dst$$XMMRegister, $src$$Register); 3695 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3696 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3697 if (vlen >= 16) { 3698 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3699 if (vlen >= 32) { 3700 assert(vlen == 32, "sanity"); 3701 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3702 } 3703 } 3704 } 3705 %} 3706 ins_pipe( pipe_slow ); 3707%} 3708 3709instruct ReplB_mem(vec dst, memory mem) %{ 3710 predicate(VM_Version::supports_avx2()); 3711 match(Set dst (ReplicateB (LoadB mem))); 3712 format %{ "replicateB $dst,$mem" %} 3713 ins_encode %{ 3714 int vlen_enc = vector_length_encoding(this); 3715 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 3716 %} 3717 ins_pipe( pipe_slow ); 3718%} 3719 3720instruct ReplB_imm(vec dst, immI con) %{ 3721 match(Set dst (ReplicateB con)); 3722 format %{ "replicateB $dst,$con" %} 3723 ins_encode %{ 3724 uint vlen = vector_length(this); 3725 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1)); 3726 if (vlen == 4) { 3727 __ movdl($dst$$XMMRegister, const_addr); 3728 } else { 3729 __ movq($dst$$XMMRegister, const_addr); 3730 if (vlen >= 16) { 3731 if (VM_Version::supports_avx2()) { 3732 int vlen_enc = vector_length_encoding(this); 3733 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3734 } else { 3735 assert(vlen == 16, "sanity"); 3736 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3737 } 3738 } 3739 } 3740 %} 3741 ins_pipe( pipe_slow ); 3742%} 3743 3744// Replicate byte scalar zero to be vector 3745instruct ReplB_zero(vec dst, immI_0 zero) %{ 3746 match(Set dst (ReplicateB zero)); 3747 format %{ "replicateB $dst,$zero" %} 3748 ins_encode %{ 3749 uint vlen = vector_length(this); 3750 if (vlen <= 16) { 3751 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3752 } else { 3753 // Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ). 3754 int vlen_enc = vector_length_encoding(this); 3755 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3756 } 3757 %} 3758 ins_pipe( fpu_reg_reg ); 3759%} 3760 3761// ====================ReplicateS======================================= 3762 3763instruct ReplS_reg(vec dst, rRegI src) %{ 3764 match(Set dst (ReplicateS src)); 3765 format %{ "replicateS $dst,$src" %} 3766 ins_encode %{ 3767 uint vlen = vector_length(this); 3768 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3769 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 3770 int vlen_enc = vector_length_encoding(this); 3771 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 3772 } else if (VM_Version::supports_avx2()) { 3773 int vlen_enc = vector_length_encoding(this); 3774 __ movdl($dst$$XMMRegister, $src$$Register); 3775 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3776 } else { 3777 __ movdl($dst$$XMMRegister, $src$$Register); 3778 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3779 if (vlen >= 8) { 3780 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3781 if (vlen >= 16) { 3782 assert(vlen == 16, "sanity"); 3783 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3784 } 3785 } 3786 } 3787 %} 3788 ins_pipe( pipe_slow ); 3789%} 3790 3791instruct ReplS_mem(vec dst, memory mem) %{ 3792 predicate(VM_Version::supports_avx2()); 3793 match(Set dst (ReplicateS (LoadS mem))); 3794 format %{ "replicateS $dst,$mem" %} 3795 ins_encode %{ 3796 int vlen_enc = vector_length_encoding(this); 3797 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 3798 %} 3799 ins_pipe( pipe_slow ); 3800%} 3801 3802instruct ReplS_imm(vec dst, immI con) %{ 3803 match(Set dst (ReplicateS con)); 3804 format %{ "replicateS $dst,$con" %} 3805 ins_encode %{ 3806 uint vlen = vector_length(this); 3807 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2)); 3808 if (vlen == 2) { 3809 __ movdl($dst$$XMMRegister, const_addr); 3810 } else { 3811 __ movq($dst$$XMMRegister, const_addr); 3812 if (vlen >= 8) { 3813 if (VM_Version::supports_avx2()) { 3814 int vlen_enc = vector_length_encoding(this); 3815 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3816 } else { 3817 assert(vlen == 8, "sanity"); 3818 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3819 } 3820 } 3821 } 3822 %} 3823 ins_pipe( fpu_reg_reg ); 3824%} 3825 3826instruct ReplS_zero(vec dst, immI_0 zero) %{ 3827 match(Set dst (ReplicateS zero)); 3828 format %{ "replicateS $dst,$zero" %} 3829 ins_encode %{ 3830 uint vlen = vector_length(this); 3831 if (vlen <= 8) { 3832 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3833 } else { 3834 int vlen_enc = vector_length_encoding(this); 3835 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3836 } 3837 %} 3838 ins_pipe( fpu_reg_reg ); 3839%} 3840 3841// ====================ReplicateI======================================= 3842 3843instruct ReplI_reg(vec dst, rRegI src) %{ 3844 match(Set dst (ReplicateI src)); 3845 format %{ "replicateI $dst,$src" %} 3846 ins_encode %{ 3847 uint vlen = vector_length(this); 3848 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3849 int vlen_enc = vector_length_encoding(this); 3850 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 3851 } else if (VM_Version::supports_avx2()) { 3852 int vlen_enc = vector_length_encoding(this); 3853 __ movdl($dst$$XMMRegister, $src$$Register); 3854 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3855 } else { 3856 __ movdl($dst$$XMMRegister, $src$$Register); 3857 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3858 if (vlen >= 8) { 3859 assert(vlen == 8, "sanity"); 3860 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3861 } 3862 } 3863 %} 3864 ins_pipe( pipe_slow ); 3865%} 3866 3867instruct ReplI_mem(vec dst, memory mem) %{ 3868 match(Set dst (ReplicateI (LoadI mem))); 3869 format %{ "replicateI $dst,$mem" %} 3870 ins_encode %{ 3871 uint vlen = vector_length(this); 3872 if (vlen <= 4) { 3873 __ movdl($dst$$XMMRegister, $mem$$Address); 3874 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3875 } else { 3876 assert(VM_Version::supports_avx2(), "sanity"); 3877 int vlen_enc = vector_length_encoding(this); 3878 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 3879 } 3880 %} 3881 ins_pipe( pipe_slow ); 3882%} 3883 3884instruct ReplI_imm(vec dst, immI con) %{ 3885 match(Set dst (ReplicateI con)); 3886 format %{ "replicateI $dst,$con" %} 3887 ins_encode %{ 3888 uint vlen = vector_length(this); 3889 InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4)); 3890 if (vlen <= 4) { 3891 __ movq($dst$$XMMRegister, const_addr); 3892 if (vlen == 4) { 3893 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3894 } 3895 } else { 3896 assert(VM_Version::supports_avx2(), "sanity"); 3897 int vlen_enc = vector_length_encoding(this); 3898 __ movq($dst$$XMMRegister, const_addr); 3899 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3900 } 3901 %} 3902 ins_pipe( pipe_slow ); 3903%} 3904 3905// Replicate integer (4 byte) scalar zero to be vector 3906instruct ReplI_zero(vec dst, immI_0 zero) %{ 3907 match(Set dst (ReplicateI zero)); 3908 format %{ "replicateI $dst,$zero" %} 3909 ins_encode %{ 3910 uint vlen = vector_length(this); 3911 if (vlen <= 4) { 3912 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3913 } else { 3914 int vlen_enc = vector_length_encoding(this); 3915 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3916 } 3917 %} 3918 ins_pipe( fpu_reg_reg ); 3919%} 3920 3921instruct ReplI_M1(vec dst, immI_M1 con) %{ 3922 predicate(UseAVX > 0); 3923 match(Set dst (ReplicateB con)); 3924 match(Set dst (ReplicateS con)); 3925 match(Set dst (ReplicateI con)); 3926 effect(TEMP dst); 3927 format %{ "vallones $dst" %} 3928 ins_encode %{ 3929 int vector_len = vector_length_encoding(this); 3930 __ vallones($dst$$XMMRegister, vector_len); 3931 %} 3932 ins_pipe( pipe_slow ); 3933%} 3934 3935// ====================ReplicateL======================================= 3936 3937#ifdef _LP64 3938// Replicate long (8 byte) scalar to be vector 3939instruct ReplL_reg(vec dst, rRegL src) %{ 3940 match(Set dst (ReplicateL src)); 3941 format %{ "replicateL $dst,$src" %} 3942 ins_encode %{ 3943 uint vlen = vector_length(this); 3944 if (vlen == 2) { 3945 __ movdq($dst$$XMMRegister, $src$$Register); 3946 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3947 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3948 int vlen_enc = vector_length_encoding(this); 3949 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 3950 } else if (VM_Version::supports_avx2()) { 3951 assert(vlen == 4, "sanity"); 3952 int vlen_enc = vector_length_encoding(this); 3953 __ movdq($dst$$XMMRegister, $src$$Register); 3954 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3955 } else { 3956 assert(vlen == 4, "sanity"); 3957 __ movdq($dst$$XMMRegister, $src$$Register); 3958 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3959 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3960 } 3961 %} 3962 ins_pipe( pipe_slow ); 3963%} 3964#else // _LP64 3965// Replicate long (8 byte) scalar to be vector 3966instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 3967 predicate(vector_length(n) <= 4); 3968 match(Set dst (ReplicateL src)); 3969 effect(TEMP dst, USE src, TEMP tmp); 3970 format %{ "replicateL $dst,$src" %} 3971 ins_encode %{ 3972 uint vlen = vector_length(this); 3973 if (vlen == 2) { 3974 __ movdl($dst$$XMMRegister, $src$$Register); 3975 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3976 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3977 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3978 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 3979 int vlen_enc = Assembler::AVX_256bit; 3980 __ movdl($dst$$XMMRegister, $src$$Register); 3981 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3982 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3983 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3984 } else { 3985 __ movdl($dst$$XMMRegister, $src$$Register); 3986 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3987 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3988 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3989 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3990 } 3991 %} 3992 ins_pipe( pipe_slow ); 3993%} 3994 3995instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 3996 predicate(vector_length(n) == 8); 3997 match(Set dst (ReplicateL src)); 3998 effect(TEMP dst, USE src, TEMP tmp); 3999 format %{ "replicateL $dst,$src" %} 4000 ins_encode %{ 4001 if (VM_Version::supports_avx512vl()) { 4002 __ movdl($dst$$XMMRegister, $src$$Register); 4003 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4004 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4005 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4006 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4007 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4008 } else { 4009 int vlen_enc = Assembler::AVX_512bit; 4010 __ movdl($dst$$XMMRegister, $src$$Register); 4011 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4012 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4013 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4014 } 4015 %} 4016 ins_pipe( pipe_slow ); 4017%} 4018#endif // _LP64 4019 4020instruct ReplL_mem(vec dst, memory mem) %{ 4021 match(Set dst (ReplicateL (LoadL mem))); 4022 format %{ "replicateL $dst,$mem" %} 4023 ins_encode %{ 4024 uint vlen = vector_length(this); 4025 if (vlen == 2) { 4026 __ movq($dst$$XMMRegister, $mem$$Address); 4027 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4028 } else { 4029 assert(VM_Version::supports_avx2(), "sanity"); 4030 int vlen_enc = vector_length_encoding(this); 4031 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4032 } 4033 %} 4034 ins_pipe( pipe_slow ); 4035%} 4036 4037// Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4038instruct ReplL_imm(vec dst, immL con) %{ 4039 match(Set dst (ReplicateL con)); 4040 format %{ "replicateL $dst,$con" %} 4041 ins_encode %{ 4042 uint vlen = vector_length(this); 4043 InternalAddress const_addr = $constantaddress($con); 4044 if (vlen == 2) { 4045 __ movq($dst$$XMMRegister, const_addr); 4046 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4047 } else { 4048 assert(VM_Version::supports_avx2(), "sanity"); 4049 int vlen_enc = vector_length_encoding(this); 4050 __ movq($dst$$XMMRegister, const_addr); 4051 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4052 } 4053 %} 4054 ins_pipe( pipe_slow ); 4055%} 4056 4057instruct ReplL_zero(vec dst, immL0 zero) %{ 4058 match(Set dst (ReplicateL zero)); 4059 format %{ "replicateL $dst,$zero" %} 4060 ins_encode %{ 4061 int vlen = vector_length(this); 4062 if (vlen == 2) { 4063 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4064 } else { 4065 int vlen_enc = vector_length_encoding(this); 4066 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4067 } 4068 %} 4069 ins_pipe( fpu_reg_reg ); 4070%} 4071 4072instruct ReplL_M1(vec dst, immL_M1 con) %{ 4073 predicate(UseAVX > 0); 4074 match(Set dst (ReplicateL con)); 4075 effect(TEMP dst); 4076 format %{ "vallones $dst" %} 4077 ins_encode %{ 4078 int vector_len = vector_length_encoding(this); 4079 __ vallones($dst$$XMMRegister, vector_len); 4080 %} 4081 ins_pipe( pipe_slow ); 4082%} 4083 4084// ====================ReplicateF======================================= 4085 4086instruct ReplF_reg(vec dst, vlRegF src) %{ 4087 match(Set dst (ReplicateF src)); 4088 format %{ "replicateF $dst,$src" %} 4089 ins_encode %{ 4090 uint vlen = vector_length(this); 4091 if (vlen <= 4) { 4092 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4093 } else if (VM_Version::supports_avx2()) { 4094 int vlen_enc = vector_length_encoding(this); 4095 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4096 } else { 4097 assert(vlen == 8, "sanity"); 4098 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4099 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4100 } 4101 %} 4102 ins_pipe( pipe_slow ); 4103%} 4104 4105instruct ReplF_mem(vec dst, memory mem) %{ 4106 match(Set dst (ReplicateF (LoadF mem))); 4107 format %{ "replicateF $dst,$mem" %} 4108 ins_encode %{ 4109 uint vlen = vector_length(this); 4110 if (vlen <= 4) { 4111 __ movdl($dst$$XMMRegister, $mem$$Address); 4112 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4113 } else { 4114 assert(VM_Version::supports_avx(), "sanity"); 4115 int vlen_enc = vector_length_encoding(this); 4116 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4117 } 4118 %} 4119 ins_pipe( pipe_slow ); 4120%} 4121 4122instruct ReplF_zero(vec dst, immF0 zero) %{ 4123 match(Set dst (ReplicateF zero)); 4124 format %{ "replicateF $dst,$zero" %} 4125 ins_encode %{ 4126 uint vlen = vector_length(this); 4127 if (vlen <= 4) { 4128 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4129 } else { 4130 int vlen_enc = vector_length_encoding(this); 4131 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4132 } 4133 %} 4134 ins_pipe( fpu_reg_reg ); 4135%} 4136 4137// ====================ReplicateD======================================= 4138 4139// Replicate double (8 bytes) scalar to be vector 4140instruct ReplD_reg(vec dst, vlRegD src) %{ 4141 match(Set dst (ReplicateD src)); 4142 format %{ "replicateD $dst,$src" %} 4143 ins_encode %{ 4144 uint vlen = vector_length(this); 4145 if (vlen == 2) { 4146 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4147 } else if (VM_Version::supports_avx2()) { 4148 int vlen_enc = vector_length_encoding(this); 4149 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4150 } else { 4151 assert(vlen == 4, "sanity"); 4152 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4153 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4154 } 4155 %} 4156 ins_pipe( pipe_slow ); 4157%} 4158 4159instruct ReplD_mem(vec dst, memory mem) %{ 4160 match(Set dst (ReplicateD (LoadD mem))); 4161 format %{ "replicateD $dst,$mem" %} 4162 ins_encode %{ 4163 uint vlen = vector_length(this); 4164 if (vlen == 2) { 4165 __ movq($dst$$XMMRegister, $mem$$Address); 4166 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 4167 } else { 4168 assert(VM_Version::supports_avx(), "sanity"); 4169 int vlen_enc = vector_length_encoding(this); 4170 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4171 } 4172 %} 4173 ins_pipe( pipe_slow ); 4174%} 4175 4176instruct ReplD_zero(vec dst, immD0 zero) %{ 4177 match(Set dst (ReplicateD zero)); 4178 format %{ "replicateD $dst,$zero" %} 4179 ins_encode %{ 4180 uint vlen = vector_length(this); 4181 if (vlen == 2) { 4182 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4183 } else { 4184 int vlen_enc = vector_length_encoding(this); 4185 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4186 } 4187 %} 4188 ins_pipe( fpu_reg_reg ); 4189%} 4190 4191// ====================VECTOR INSERT======================================= 4192 4193instruct insert(vec dst, rRegI val, immU8 idx) %{ 4194 predicate(vector_length_in_bytes(n) < 32); 4195 match(Set dst (VectorInsert (Binary dst val) idx)); 4196 format %{ "vector_insert $dst,$val,$idx" %} 4197 ins_encode %{ 4198 assert(UseSSE >= 4, "required"); 4199 assert(vector_length_in_bytes(this) >= 8, "required"); 4200 4201 BasicType elem_bt = vector_element_basic_type(this); 4202 4203 assert(is_integral_type(elem_bt), ""); 4204 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4205 4206 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4207 %} 4208 ins_pipe( pipe_slow ); 4209%} 4210 4211instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4212 predicate(vector_length_in_bytes(n) == 32); 4213 match(Set dst (VectorInsert (Binary src val) idx)); 4214 effect(TEMP vtmp); 4215 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4216 ins_encode %{ 4217 int vlen_enc = Assembler::AVX_256bit; 4218 BasicType elem_bt = vector_element_basic_type(this); 4219 int elem_per_lane = 16/type2aelembytes(elem_bt); 4220 int log2epr = log2(elem_per_lane); 4221 4222 assert(is_integral_type(elem_bt), "sanity"); 4223 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4224 4225 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4226 uint y_idx = ($idx$$constant >> log2epr) & 1; 4227 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4228 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4229 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4230 %} 4231 ins_pipe( pipe_slow ); 4232%} 4233 4234instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4235 predicate(vector_length_in_bytes(n) == 64); 4236 match(Set dst (VectorInsert (Binary src val) idx)); 4237 effect(TEMP vtmp); 4238 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4239 ins_encode %{ 4240 assert(UseAVX > 2, "sanity"); 4241 4242 BasicType elem_bt = vector_element_basic_type(this); 4243 int elem_per_lane = 16/type2aelembytes(elem_bt); 4244 int log2epr = log2(elem_per_lane); 4245 4246 assert(is_integral_type(elem_bt), ""); 4247 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4248 4249 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4250 uint y_idx = ($idx$$constant >> log2epr) & 3; 4251 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4252 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4253 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4254 %} 4255 ins_pipe( pipe_slow ); 4256%} 4257 4258#ifdef _LP64 4259instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4260 predicate(vector_length(n) == 2); 4261 match(Set dst (VectorInsert (Binary dst val) idx)); 4262 format %{ "vector_insert $dst,$val,$idx" %} 4263 ins_encode %{ 4264 assert(UseSSE >= 4, "required"); 4265 assert(vector_element_basic_type(this) == T_LONG, ""); 4266 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4267 4268 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4269 %} 4270 ins_pipe( pipe_slow ); 4271%} 4272 4273instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4274 predicate(vector_length(n) == 4); 4275 match(Set dst (VectorInsert (Binary src val) idx)); 4276 effect(TEMP vtmp); 4277 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4278 ins_encode %{ 4279 assert(vector_element_basic_type(this) == T_LONG, ""); 4280 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4281 4282 uint x_idx = $idx$$constant & right_n_bits(1); 4283 uint y_idx = ($idx$$constant >> 1) & 1; 4284 int vlen_enc = Assembler::AVX_256bit; 4285 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4286 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4287 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4288 %} 4289 ins_pipe( pipe_slow ); 4290%} 4291 4292instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4293 predicate(vector_length(n) == 8); 4294 match(Set dst (VectorInsert (Binary src val) idx)); 4295 effect(TEMP vtmp); 4296 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4297 ins_encode %{ 4298 assert(vector_element_basic_type(this) == T_LONG, "sanity"); 4299 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4300 4301 uint x_idx = $idx$$constant & right_n_bits(1); 4302 uint y_idx = ($idx$$constant >> 1) & 3; 4303 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4304 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4305 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4306 %} 4307 ins_pipe( pipe_slow ); 4308%} 4309#endif 4310 4311instruct insertF(vec dst, regF val, immU8 idx) %{ 4312 predicate(vector_length(n) < 8); 4313 match(Set dst (VectorInsert (Binary dst val) idx)); 4314 format %{ "vector_insert $dst,$val,$idx" %} 4315 ins_encode %{ 4316 assert(UseSSE >= 4, "sanity"); 4317 4318 assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); 4319 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4320 4321 __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 4322 %} 4323 ins_pipe( pipe_slow ); 4324%} 4325 4326instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4327 predicate(vector_length(n) >= 8); 4328 match(Set dst (VectorInsert (Binary src val) idx)); 4329 effect(TEMP vtmp); 4330 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4331 ins_encode %{ 4332 assert(vector_element_basic_type(this) == T_FLOAT, "sanity"); 4333 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4334 4335 int vlen = vector_length(this); 4336 uint x_idx = $idx$$constant & right_n_bits(2); 4337 if (vlen == 8) { 4338 uint y_idx = ($idx$$constant >> 2) & 1; 4339 int vlen_enc = Assembler::AVX_256bit; 4340 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4341 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); 4342 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4343 } else { 4344 assert(vlen == 16, "sanity"); 4345 uint y_idx = ($idx$$constant >> 2) & 3; 4346 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4347 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx); 4348 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4349 } 4350 %} 4351 ins_pipe( pipe_slow ); 4352%} 4353 4354#ifdef _LP64 4355instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4356 predicate(vector_length(n) == 2); 4357 match(Set dst (VectorInsert (Binary dst val) idx)); 4358 effect(TEMP tmp); 4359 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4360 ins_encode %{ 4361 assert(UseSSE >= 4, "sanity"); 4362 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4363 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4364 4365 __ movq($tmp$$Register, $val$$XMMRegister); 4366 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4367 %} 4368 ins_pipe( pipe_slow ); 4369%} 4370 4371instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4372 predicate(vector_length(n) == 4); 4373 match(Set dst (VectorInsert (Binary src val) idx)); 4374 effect(TEMP vtmp, TEMP tmp); 4375 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4376 ins_encode %{ 4377 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4378 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4379 4380 uint x_idx = $idx$$constant & right_n_bits(1); 4381 uint y_idx = ($idx$$constant >> 1) & 1; 4382 int vlen_enc = Assembler::AVX_256bit; 4383 __ movq($tmp$$Register, $val$$XMMRegister); 4384 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4385 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4386 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4387 %} 4388 ins_pipe( pipe_slow ); 4389%} 4390 4391instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4392 predicate(vector_length(n) == 8); 4393 match(Set dst (VectorInsert (Binary src val) idx)); 4394 effect(TEMP tmp, TEMP vtmp); 4395 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4396 ins_encode %{ 4397 assert(vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4398 assert($idx$$constant < (int)vector_length(this), "out of bounds"); 4399 4400 uint x_idx = $idx$$constant & right_n_bits(1); 4401 uint y_idx = ($idx$$constant >> 1) & 3; 4402 __ movq($tmp$$Register, $val$$XMMRegister); 4403 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4404 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4405 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4406 %} 4407 ins_pipe( pipe_slow ); 4408%} 4409#endif 4410 4411// ====================REDUCTION ARITHMETIC======================================= 4412 4413// =======================Int Reduction========================================== 4414 4415instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4416 predicate(vector_element_basic_type(n->in(2)) == T_INT); // src2 4417 match(Set dst (AddReductionVI src1 src2)); 4418 match(Set dst (MulReductionVI src1 src2)); 4419 match(Set dst (AndReductionV src1 src2)); 4420 match(Set dst ( OrReductionV src1 src2)); 4421 match(Set dst (XorReductionV src1 src2)); 4422 match(Set dst (MinReductionV src1 src2)); 4423 match(Set dst (MaxReductionV src1 src2)); 4424 effect(TEMP vtmp1, TEMP vtmp2); 4425 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4426 ins_encode %{ 4427 int opcode = this->ideal_Opcode(); 4428 int vlen = vector_length(this, $src2); 4429 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4430 %} 4431 ins_pipe( pipe_slow ); 4432%} 4433 4434// =======================Long Reduction========================================== 4435 4436#ifdef _LP64 4437instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4438 predicate(vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4439 match(Set dst (AddReductionVL src1 src2)); 4440 match(Set dst (MulReductionVL src1 src2)); 4441 match(Set dst (AndReductionV src1 src2)); 4442 match(Set dst ( OrReductionV src1 src2)); 4443 match(Set dst (XorReductionV src1 src2)); 4444 match(Set dst (MinReductionV src1 src2)); 4445 match(Set dst (MaxReductionV src1 src2)); 4446 effect(TEMP vtmp1, TEMP vtmp2); 4447 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4448 ins_encode %{ 4449 int opcode = this->ideal_Opcode(); 4450 int vlen = vector_length(this, $src2); 4451 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4452 %} 4453 ins_pipe( pipe_slow ); 4454%} 4455 4456instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4457 predicate(vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4458 match(Set dst (AddReductionVL src1 src2)); 4459 match(Set dst (MulReductionVL src1 src2)); 4460 match(Set dst (AndReductionV src1 src2)); 4461 match(Set dst ( OrReductionV src1 src2)); 4462 match(Set dst (XorReductionV src1 src2)); 4463 match(Set dst (MinReductionV src1 src2)); 4464 match(Set dst (MaxReductionV src1 src2)); 4465 effect(TEMP vtmp1, TEMP vtmp2); 4466 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4467 ins_encode %{ 4468 int opcode = this->ideal_Opcode(); 4469 int vlen = vector_length(this, $src2); 4470 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4471 %} 4472 ins_pipe( pipe_slow ); 4473%} 4474#endif // _LP64 4475 4476// =======================Float Reduction========================================== 4477 4478instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4479 predicate(vector_length(n->in(2)) <= 4); // src 4480 match(Set dst (AddReductionVF dst src)); 4481 match(Set dst (MulReductionVF dst src)); 4482 effect(TEMP dst, TEMP vtmp); 4483 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4484 ins_encode %{ 4485 int opcode = this->ideal_Opcode(); 4486 int vlen = vector_length(this, $src); 4487 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4488 %} 4489 ins_pipe( pipe_slow ); 4490%} 4491 4492instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4493 predicate(vector_length(n->in(2)) == 8); // src 4494 match(Set dst (AddReductionVF dst src)); 4495 match(Set dst (MulReductionVF dst src)); 4496 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4497 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4498 ins_encode %{ 4499 int opcode = this->ideal_Opcode(); 4500 int vlen = vector_length(this, $src); 4501 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4502 %} 4503 ins_pipe( pipe_slow ); 4504%} 4505 4506instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4507 predicate(vector_length(n->in(2)) == 16); // src 4508 match(Set dst (AddReductionVF dst src)); 4509 match(Set dst (MulReductionVF dst src)); 4510 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4511 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4512 ins_encode %{ 4513 int opcode = this->ideal_Opcode(); 4514 int vlen = vector_length(this, $src); 4515 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4516 %} 4517 ins_pipe( pipe_slow ); 4518%} 4519 4520// =======================Double Reduction========================================== 4521 4522instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4523 predicate(vector_length(n->in(2)) == 2); // src 4524 match(Set dst (AddReductionVD dst src)); 4525 match(Set dst (MulReductionVD dst src)); 4526 effect(TEMP dst, TEMP vtmp); 4527 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4528 ins_encode %{ 4529 int opcode = this->ideal_Opcode(); 4530 int vlen = vector_length(this, $src); 4531 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4532%} 4533 ins_pipe( pipe_slow ); 4534%} 4535 4536instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4537 predicate(vector_length(n->in(2)) == 4); // src 4538 match(Set dst (AddReductionVD dst src)); 4539 match(Set dst (MulReductionVD dst src)); 4540 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4541 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4542 ins_encode %{ 4543 int opcode = this->ideal_Opcode(); 4544 int vlen = vector_length(this, $src); 4545 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4546 %} 4547 ins_pipe( pipe_slow ); 4548%} 4549 4550instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4551 predicate(vector_length(n->in(2)) == 8); // src 4552 match(Set dst (AddReductionVD dst src)); 4553 match(Set dst (MulReductionVD dst src)); 4554 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4555 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4556 ins_encode %{ 4557 int opcode = this->ideal_Opcode(); 4558 int vlen = vector_length(this, $src); 4559 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4560 %} 4561 ins_pipe( pipe_slow ); 4562%} 4563 4564// =======================Byte Reduction========================================== 4565 4566#ifdef _LP64 4567instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4568 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 4569 match(Set dst (AddReductionVI src1 src2)); 4570 match(Set dst (AndReductionV src1 src2)); 4571 match(Set dst ( OrReductionV src1 src2)); 4572 match(Set dst (XorReductionV src1 src2)); 4573 match(Set dst (MinReductionV src1 src2)); 4574 match(Set dst (MaxReductionV src1 src2)); 4575 effect(TEMP vtmp1, TEMP vtmp2); 4576 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4577 ins_encode %{ 4578 int opcode = this->ideal_Opcode(); 4579 int vlen = vector_length(this, $src2); 4580 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4581 %} 4582 ins_pipe( pipe_slow ); 4583%} 4584 4585instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4586 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 4587 match(Set dst (AddReductionVI src1 src2)); 4588 match(Set dst (AndReductionV src1 src2)); 4589 match(Set dst ( OrReductionV src1 src2)); 4590 match(Set dst (XorReductionV src1 src2)); 4591 match(Set dst (MinReductionV src1 src2)); 4592 match(Set dst (MaxReductionV src1 src2)); 4593 effect(TEMP vtmp1, TEMP vtmp2); 4594 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4595 ins_encode %{ 4596 int opcode = this->ideal_Opcode(); 4597 int vlen = vector_length(this, $src2); 4598 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4599 %} 4600 ins_pipe( pipe_slow ); 4601%} 4602#endif 4603 4604// =======================Short Reduction========================================== 4605 4606instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4607 predicate(vector_element_basic_type(n->in(2)) == T_SHORT); // src2 4608 match(Set dst (AddReductionVI src1 src2)); 4609 match(Set dst (MulReductionVI src1 src2)); 4610 match(Set dst (AndReductionV src1 src2)); 4611 match(Set dst ( OrReductionV src1 src2)); 4612 match(Set dst (XorReductionV src1 src2)); 4613 match(Set dst (MinReductionV src1 src2)); 4614 match(Set dst (MaxReductionV src1 src2)); 4615 effect(TEMP vtmp1, TEMP vtmp2); 4616 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4617 ins_encode %{ 4618 int opcode = this->ideal_Opcode(); 4619 int vlen = vector_length(this, $src2); 4620 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4621 %} 4622 ins_pipe( pipe_slow ); 4623%} 4624 4625// =======================Mul Reduction========================================== 4626 4627instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4628 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4629 vector_length(n->in(2)) <= 32); // src2 4630 match(Set dst (MulReductionVI src1 src2)); 4631 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4632 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4633 ins_encode %{ 4634 int opcode = this->ideal_Opcode(); 4635 int vlen = vector_length(this, $src2); 4636 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4637 %} 4638 ins_pipe( pipe_slow ); 4639%} 4640 4641instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4642 predicate(vector_element_basic_type(n->in(2)) == T_BYTE && 4643 vector_length(n->in(2)) == 64); // src2 4644 match(Set dst (MulReductionVI src1 src2)); 4645 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4646 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4647 ins_encode %{ 4648 int opcode = this->ideal_Opcode(); 4649 int vlen = vector_length(this, $src2); 4650 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4651 %} 4652 ins_pipe( pipe_slow ); 4653%} 4654 4655//--------------------Min/Max Float Reduction -------------------- 4656// Float Min Reduction 4657instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 4658 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4659 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4660 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4661 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4662 vector_length(n->in(2)) == 2); 4663 match(Set dst (MinReductionV src1 src2)); 4664 match(Set dst (MaxReductionV src1 src2)); 4665 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4666 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4667 ins_encode %{ 4668 assert(UseAVX > 0, "sanity"); 4669 4670 int opcode = this->ideal_Opcode(); 4671 int vlen = vector_length(this, $src2); 4672 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4673 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4674 %} 4675 ins_pipe( pipe_slow ); 4676%} 4677 4678instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 4679 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4680 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4681 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4682 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4683 vector_length(n->in(2)) >= 4); 4684 match(Set dst (MinReductionV src1 src2)); 4685 match(Set dst (MaxReductionV src1 src2)); 4686 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4687 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4688 ins_encode %{ 4689 assert(UseAVX > 0, "sanity"); 4690 4691 int opcode = this->ideal_Opcode(); 4692 int vlen = vector_length(this, $src2); 4693 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4694 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4695 %} 4696 ins_pipe( pipe_slow ); 4697%} 4698 4699instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 4700 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4701 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4702 vector_length(n->in(2)) == 2); 4703 match(Set dst (MinReductionV dst src)); 4704 match(Set dst (MaxReductionV dst src)); 4705 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4706 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4707 ins_encode %{ 4708 assert(UseAVX > 0, "sanity"); 4709 4710 int opcode = this->ideal_Opcode(); 4711 int vlen = vector_length(this, $src); 4712 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4713 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4714 %} 4715 ins_pipe( pipe_slow ); 4716%} 4717 4718 4719instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 4720 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4721 predicate(vector_element_basic_type(n->in(2)) == T_FLOAT && 4722 vector_length(n->in(2)) >= 4); 4723 match(Set dst (MinReductionV dst src)); 4724 match(Set dst (MaxReductionV dst src)); 4725 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4726 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4727 ins_encode %{ 4728 assert(UseAVX > 0, "sanity"); 4729 4730 int opcode = this->ideal_Opcode(); 4731 int vlen = vector_length(this, $src); 4732 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4733 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4734 %} 4735 ins_pipe( pipe_slow ); 4736%} 4737 4738 4739//--------------------Min Double Reduction -------------------- 4740instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 4741 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 4742 rFlagsReg cr) %{ 4743 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4744 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 4745 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 4746 vector_length(n->in(2)) == 2); 4747 match(Set dst (MinReductionV src1 src2)); 4748 match(Set dst (MaxReductionV src1 src2)); 4749 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 4750 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 4751 ins_encode %{ 4752 assert(UseAVX > 0, "sanity"); 4753 4754 int opcode = this->ideal_Opcode(); 4755 int vlen = vector_length(this, $src2); 4756 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 4757 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 4758 %} 4759 ins_pipe( pipe_slow ); 4760%} 4761 4762instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 4763 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 4764 rFlagsReg cr) %{ 4765 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4766 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 4767 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 4768 vector_length(n->in(2)) >= 4); 4769 match(Set dst (MinReductionV src1 src2)); 4770 match(Set dst (MaxReductionV src1 src2)); 4771 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 4772 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 4773 ins_encode %{ 4774 assert(UseAVX > 0, "sanity"); 4775 4776 int opcode = this->ideal_Opcode(); 4777 int vlen = vector_length(this, $src2); 4778 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 4779 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 4780 %} 4781 ins_pipe( pipe_slow ); 4782%} 4783 4784 4785instruct minmax_reduction2D_av(legRegD dst, legVec src, 4786 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 4787 rFlagsReg cr) %{ 4788 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4789 vector_length(n->in(2)) == 2); 4790 match(Set dst (MinReductionV dst src)); 4791 match(Set dst (MaxReductionV dst src)); 4792 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 4793 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 4794 ins_encode %{ 4795 assert(UseAVX > 0, "sanity"); 4796 4797 int opcode = this->ideal_Opcode(); 4798 int vlen = vector_length(this, $src); 4799 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 4800 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 4801 %} 4802 ins_pipe( pipe_slow ); 4803%} 4804 4805instruct minmax_reductionD_av(legRegD dst, legVec src, 4806 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 4807 rFlagsReg cr) %{ 4808 predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE && 4809 vector_length(n->in(2)) >= 4); 4810 match(Set dst (MinReductionV dst src)); 4811 match(Set dst (MaxReductionV dst src)); 4812 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 4813 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 4814 ins_encode %{ 4815 assert(UseAVX > 0, "sanity"); 4816 4817 int opcode = this->ideal_Opcode(); 4818 int vlen = vector_length(this, $src); 4819 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 4820 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 4821 %} 4822 ins_pipe( pipe_slow ); 4823%} 4824 4825// ====================VECTOR ARITHMETIC======================================= 4826 4827// --------------------------------- ADD -------------------------------------- 4828 4829// Bytes vector add 4830instruct vaddB(vec dst, vec src) %{ 4831 predicate(UseAVX == 0); 4832 match(Set dst (AddVB dst src)); 4833 format %{ "paddb $dst,$src\t! add packedB" %} 4834 ins_encode %{ 4835 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 4836 %} 4837 ins_pipe( pipe_slow ); 4838%} 4839 4840instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 4841 predicate(UseAVX > 0); 4842 match(Set dst (AddVB src1 src2)); 4843 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 4844 ins_encode %{ 4845 int vlen_enc = vector_length_encoding(this); 4846 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4847 %} 4848 ins_pipe( pipe_slow ); 4849%} 4850 4851instruct vaddB_mem(vec dst, vec src, memory mem) %{ 4852 predicate((UseAVX > 0) && 4853 (vector_length_in_bytes(n->in(1)) > 8)); 4854 match(Set dst (AddVB src (LoadVector mem))); 4855 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 4856 ins_encode %{ 4857 int vlen_enc = vector_length_encoding(this); 4858 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4859 %} 4860 ins_pipe( pipe_slow ); 4861%} 4862 4863// Shorts/Chars vector add 4864instruct vaddS(vec dst, vec src) %{ 4865 predicate(UseAVX == 0); 4866 match(Set dst (AddVS dst src)); 4867 format %{ "paddw $dst,$src\t! add packedS" %} 4868 ins_encode %{ 4869 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 4870 %} 4871 ins_pipe( pipe_slow ); 4872%} 4873 4874instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 4875 predicate(UseAVX > 0); 4876 match(Set dst (AddVS src1 src2)); 4877 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 4878 ins_encode %{ 4879 int vlen_enc = vector_length_encoding(this); 4880 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4881 %} 4882 ins_pipe( pipe_slow ); 4883%} 4884 4885instruct vaddS_mem(vec dst, vec src, memory mem) %{ 4886 predicate((UseAVX > 0) && 4887 (vector_length_in_bytes(n->in(1)) > 8)); 4888 match(Set dst (AddVS src (LoadVector mem))); 4889 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 4890 ins_encode %{ 4891 int vlen_enc = vector_length_encoding(this); 4892 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4893 %} 4894 ins_pipe( pipe_slow ); 4895%} 4896 4897// Integers vector add 4898instruct vaddI(vec dst, vec src) %{ 4899 predicate(UseAVX == 0); 4900 match(Set dst (AddVI dst src)); 4901 format %{ "paddd $dst,$src\t! add packedI" %} 4902 ins_encode %{ 4903 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 4904 %} 4905 ins_pipe( pipe_slow ); 4906%} 4907 4908instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 4909 predicate(UseAVX > 0); 4910 match(Set dst (AddVI src1 src2)); 4911 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 4912 ins_encode %{ 4913 int vlen_enc = vector_length_encoding(this); 4914 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4915 %} 4916 ins_pipe( pipe_slow ); 4917%} 4918 4919 4920instruct vaddI_mem(vec dst, vec src, memory mem) %{ 4921 predicate((UseAVX > 0) && 4922 (vector_length_in_bytes(n->in(1)) > 8)); 4923 match(Set dst (AddVI src (LoadVector mem))); 4924 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 4925 ins_encode %{ 4926 int vlen_enc = vector_length_encoding(this); 4927 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4928 %} 4929 ins_pipe( pipe_slow ); 4930%} 4931 4932// Longs vector add 4933instruct vaddL(vec dst, vec src) %{ 4934 predicate(UseAVX == 0); 4935 match(Set dst (AddVL dst src)); 4936 format %{ "paddq $dst,$src\t! add packedL" %} 4937 ins_encode %{ 4938 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 4939 %} 4940 ins_pipe( pipe_slow ); 4941%} 4942 4943instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 4944 predicate(UseAVX > 0); 4945 match(Set dst (AddVL src1 src2)); 4946 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 4947 ins_encode %{ 4948 int vlen_enc = vector_length_encoding(this); 4949 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4950 %} 4951 ins_pipe( pipe_slow ); 4952%} 4953 4954instruct vaddL_mem(vec dst, vec src, memory mem) %{ 4955 predicate((UseAVX > 0) && 4956 (vector_length_in_bytes(n->in(1)) > 8)); 4957 match(Set dst (AddVL src (LoadVector mem))); 4958 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 4959 ins_encode %{ 4960 int vlen_enc = vector_length_encoding(this); 4961 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4962 %} 4963 ins_pipe( pipe_slow ); 4964%} 4965 4966// Floats vector add 4967instruct vaddF(vec dst, vec src) %{ 4968 predicate(UseAVX == 0); 4969 match(Set dst (AddVF dst src)); 4970 format %{ "addps $dst,$src\t! add packedF" %} 4971 ins_encode %{ 4972 __ addps($dst$$XMMRegister, $src$$XMMRegister); 4973 %} 4974 ins_pipe( pipe_slow ); 4975%} 4976 4977instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 4978 predicate(UseAVX > 0); 4979 match(Set dst (AddVF src1 src2)); 4980 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 4981 ins_encode %{ 4982 int vlen_enc = vector_length_encoding(this); 4983 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 4984 %} 4985 ins_pipe( pipe_slow ); 4986%} 4987 4988instruct vaddF_mem(vec dst, vec src, memory mem) %{ 4989 predicate((UseAVX > 0) && 4990 (vector_length_in_bytes(n->in(1)) > 8)); 4991 match(Set dst (AddVF src (LoadVector mem))); 4992 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 4993 ins_encode %{ 4994 int vlen_enc = vector_length_encoding(this); 4995 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 4996 %} 4997 ins_pipe( pipe_slow ); 4998%} 4999 5000// Doubles vector add 5001instruct vaddD(vec dst, vec src) %{ 5002 predicate(UseAVX == 0); 5003 match(Set dst (AddVD dst src)); 5004 format %{ "addpd $dst,$src\t! add packedD" %} 5005 ins_encode %{ 5006 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5007 %} 5008 ins_pipe( pipe_slow ); 5009%} 5010 5011instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5012 predicate(UseAVX > 0); 5013 match(Set dst (AddVD src1 src2)); 5014 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5015 ins_encode %{ 5016 int vlen_enc = vector_length_encoding(this); 5017 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5018 %} 5019 ins_pipe( pipe_slow ); 5020%} 5021 5022instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5023 predicate((UseAVX > 0) && 5024 (vector_length_in_bytes(n->in(1)) > 8)); 5025 match(Set dst (AddVD src (LoadVector mem))); 5026 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5027 ins_encode %{ 5028 int vlen_enc = vector_length_encoding(this); 5029 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5030 %} 5031 ins_pipe( pipe_slow ); 5032%} 5033 5034// --------------------------------- SUB -------------------------------------- 5035 5036// Bytes vector sub 5037instruct vsubB(vec dst, vec src) %{ 5038 predicate(UseAVX == 0); 5039 match(Set dst (SubVB dst src)); 5040 format %{ "psubb $dst,$src\t! sub packedB" %} 5041 ins_encode %{ 5042 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5043 %} 5044 ins_pipe( pipe_slow ); 5045%} 5046 5047instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5048 predicate(UseAVX > 0); 5049 match(Set dst (SubVB src1 src2)); 5050 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5051 ins_encode %{ 5052 int vlen_enc = vector_length_encoding(this); 5053 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5054 %} 5055 ins_pipe( pipe_slow ); 5056%} 5057 5058instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5059 predicate((UseAVX > 0) && 5060 (vector_length_in_bytes(n->in(1)) > 8)); 5061 match(Set dst (SubVB src (LoadVector mem))); 5062 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5063 ins_encode %{ 5064 int vlen_enc = vector_length_encoding(this); 5065 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5066 %} 5067 ins_pipe( pipe_slow ); 5068%} 5069 5070// Shorts/Chars vector sub 5071instruct vsubS(vec dst, vec src) %{ 5072 predicate(UseAVX == 0); 5073 match(Set dst (SubVS dst src)); 5074 format %{ "psubw $dst,$src\t! sub packedS" %} 5075 ins_encode %{ 5076 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5077 %} 5078 ins_pipe( pipe_slow ); 5079%} 5080 5081 5082instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5083 predicate(UseAVX > 0); 5084 match(Set dst (SubVS src1 src2)); 5085 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5086 ins_encode %{ 5087 int vlen_enc = vector_length_encoding(this); 5088 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5089 %} 5090 ins_pipe( pipe_slow ); 5091%} 5092 5093instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5094 predicate((UseAVX > 0) && 5095 (vector_length_in_bytes(n->in(1)) > 8)); 5096 match(Set dst (SubVS src (LoadVector mem))); 5097 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5098 ins_encode %{ 5099 int vlen_enc = vector_length_encoding(this); 5100 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5101 %} 5102 ins_pipe( pipe_slow ); 5103%} 5104 5105// Integers vector sub 5106instruct vsubI(vec dst, vec src) %{ 5107 predicate(UseAVX == 0); 5108 match(Set dst (SubVI dst src)); 5109 format %{ "psubd $dst,$src\t! sub packedI" %} 5110 ins_encode %{ 5111 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5112 %} 5113 ins_pipe( pipe_slow ); 5114%} 5115 5116instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5117 predicate(UseAVX > 0); 5118 match(Set dst (SubVI src1 src2)); 5119 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5120 ins_encode %{ 5121 int vlen_enc = vector_length_encoding(this); 5122 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5123 %} 5124 ins_pipe( pipe_slow ); 5125%} 5126 5127instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5128 predicate((UseAVX > 0) && 5129 (vector_length_in_bytes(n->in(1)) > 8)); 5130 match(Set dst (SubVI src (LoadVector mem))); 5131 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5132 ins_encode %{ 5133 int vlen_enc = vector_length_encoding(this); 5134 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5135 %} 5136 ins_pipe( pipe_slow ); 5137%} 5138 5139// Longs vector sub 5140instruct vsubL(vec dst, vec src) %{ 5141 predicate(UseAVX == 0); 5142 match(Set dst (SubVL dst src)); 5143 format %{ "psubq $dst,$src\t! sub packedL" %} 5144 ins_encode %{ 5145 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5146 %} 5147 ins_pipe( pipe_slow ); 5148%} 5149 5150instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5151 predicate(UseAVX > 0); 5152 match(Set dst (SubVL src1 src2)); 5153 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5154 ins_encode %{ 5155 int vlen_enc = vector_length_encoding(this); 5156 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5157 %} 5158 ins_pipe( pipe_slow ); 5159%} 5160 5161 5162instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5163 predicate((UseAVX > 0) && 5164 (vector_length_in_bytes(n->in(1)) > 8)); 5165 match(Set dst (SubVL src (LoadVector mem))); 5166 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5167 ins_encode %{ 5168 int vlen_enc = vector_length_encoding(this); 5169 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5170 %} 5171 ins_pipe( pipe_slow ); 5172%} 5173 5174// Floats vector sub 5175instruct vsubF(vec dst, vec src) %{ 5176 predicate(UseAVX == 0); 5177 match(Set dst (SubVF dst src)); 5178 format %{ "subps $dst,$src\t! sub packedF" %} 5179 ins_encode %{ 5180 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5181 %} 5182 ins_pipe( pipe_slow ); 5183%} 5184 5185instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5186 predicate(UseAVX > 0); 5187 match(Set dst (SubVF src1 src2)); 5188 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5189 ins_encode %{ 5190 int vlen_enc = vector_length_encoding(this); 5191 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5192 %} 5193 ins_pipe( pipe_slow ); 5194%} 5195 5196instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5197 predicate((UseAVX > 0) && 5198 (vector_length_in_bytes(n->in(1)) > 8)); 5199 match(Set dst (SubVF src (LoadVector mem))); 5200 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5201 ins_encode %{ 5202 int vlen_enc = vector_length_encoding(this); 5203 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5204 %} 5205 ins_pipe( pipe_slow ); 5206%} 5207 5208// Doubles vector sub 5209instruct vsubD(vec dst, vec src) %{ 5210 predicate(UseAVX == 0); 5211 match(Set dst (SubVD dst src)); 5212 format %{ "subpd $dst,$src\t! sub packedD" %} 5213 ins_encode %{ 5214 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5215 %} 5216 ins_pipe( pipe_slow ); 5217%} 5218 5219instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5220 predicate(UseAVX > 0); 5221 match(Set dst (SubVD src1 src2)); 5222 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5223 ins_encode %{ 5224 int vlen_enc = vector_length_encoding(this); 5225 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5226 %} 5227 ins_pipe( pipe_slow ); 5228%} 5229 5230instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5231 predicate((UseAVX > 0) && 5232 (vector_length_in_bytes(n->in(1)) > 8)); 5233 match(Set dst (SubVD src (LoadVector mem))); 5234 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5235 ins_encode %{ 5236 int vlen_enc = vector_length_encoding(this); 5237 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5238 %} 5239 ins_pipe( pipe_slow ); 5240%} 5241 5242// --------------------------------- MUL -------------------------------------- 5243 5244// Byte vector mul 5245instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5246 predicate(vector_length(n) == 4 || 5247 vector_length(n) == 8); 5248 match(Set dst (MulVB src1 src2)); 5249 effect(TEMP dst, TEMP tmp, TEMP scratch); 5250 format %{"vector_mulB $dst,$src1,$src2" %} 5251 ins_encode %{ 5252 assert(UseSSE > 3, "required"); 5253 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 5254 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 5255 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 5256 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5257 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5258 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5259 %} 5260 ins_pipe( pipe_slow ); 5261%} 5262 5263instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5264 predicate(vector_length(n) == 16 && UseAVX <= 1); 5265 match(Set dst (MulVB src1 src2)); 5266 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5267 format %{"vector_mulB $dst,$src1,$src2" %} 5268 ins_encode %{ 5269 assert(UseSSE > 3, "required"); 5270 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 5271 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 5272 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 5273 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 5274 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 5275 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5276 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 5277 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 5278 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5279 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5280 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5281 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5282 %} 5283 ins_pipe( pipe_slow ); 5284%} 5285 5286instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5287 predicate(vector_length(n) == 16 && UseAVX > 1); 5288 match(Set dst (MulVB src1 src2)); 5289 effect(TEMP dst, TEMP tmp, TEMP scratch); 5290 format %{"vector_mulB $dst,$src1,$src2" %} 5291 ins_encode %{ 5292 int vlen_enc = Assembler::AVX_256bit; 5293 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5294 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5295 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5296 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5297 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5298 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 5299 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 5300 %} 5301 ins_pipe( pipe_slow ); 5302%} 5303 5304instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5305 predicate(vector_length(n) == 32); 5306 match(Set dst (MulVB src1 src2)); 5307 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5308 format %{"vector_mulB $dst,$src1,$src2" %} 5309 ins_encode %{ 5310 assert(UseAVX > 1, "required"); 5311 int vlen_enc = Assembler::AVX_256bit; 5312 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5313 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 5314 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5315 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5316 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5317 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5318 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5319 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5320 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5321 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5322 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5323 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5324 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5325 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5326 %} 5327 ins_pipe( pipe_slow ); 5328%} 5329 5330instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5331 predicate(vector_length(n) == 64); 5332 match(Set dst (MulVB src1 src2)); 5333 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5334 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 5335 ins_encode %{ 5336 assert(UseAVX > 2, "required"); 5337 int vlen_enc = Assembler::AVX_512bit; 5338 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5339 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 5340 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5341 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5342 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5343 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5344 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5345 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5346 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5347 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5348 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5349 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5350 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5351 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 5352 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5353 %} 5354 ins_pipe( pipe_slow ); 5355%} 5356 5357// Shorts/Chars vector mul 5358instruct vmulS(vec dst, vec src) %{ 5359 predicate(UseAVX == 0); 5360 match(Set dst (MulVS dst src)); 5361 format %{ "pmullw $dst,$src\t! mul packedS" %} 5362 ins_encode %{ 5363 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5364 %} 5365 ins_pipe( pipe_slow ); 5366%} 5367 5368instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5369 predicate(UseAVX > 0); 5370 match(Set dst (MulVS src1 src2)); 5371 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5372 ins_encode %{ 5373 int vlen_enc = vector_length_encoding(this); 5374 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5375 %} 5376 ins_pipe( pipe_slow ); 5377%} 5378 5379instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5380 predicate((UseAVX > 0) && 5381 (vector_length_in_bytes(n->in(1)) > 8)); 5382 match(Set dst (MulVS src (LoadVector mem))); 5383 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5384 ins_encode %{ 5385 int vlen_enc = vector_length_encoding(this); 5386 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5387 %} 5388 ins_pipe( pipe_slow ); 5389%} 5390 5391// Integers vector mul 5392instruct vmulI(vec dst, vec src) %{ 5393 predicate(UseAVX == 0); 5394 match(Set dst (MulVI dst src)); 5395 format %{ "pmulld $dst,$src\t! mul packedI" %} 5396 ins_encode %{ 5397 assert(UseSSE > 3, "required"); 5398 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5399 %} 5400 ins_pipe( pipe_slow ); 5401%} 5402 5403instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5404 predicate(UseAVX > 0); 5405 match(Set dst (MulVI src1 src2)); 5406 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5407 ins_encode %{ 5408 int vlen_enc = vector_length_encoding(this); 5409 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5410 %} 5411 ins_pipe( pipe_slow ); 5412%} 5413 5414instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5415 predicate((UseAVX > 0) && 5416 (vector_length_in_bytes(n->in(1)) > 8)); 5417 match(Set dst (MulVI src (LoadVector mem))); 5418 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5419 ins_encode %{ 5420 int vlen_enc = vector_length_encoding(this); 5421 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5422 %} 5423 ins_pipe( pipe_slow ); 5424%} 5425 5426// Longs vector mul 5427instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 5428 predicate(VM_Version::supports_avx512dq()); 5429 match(Set dst (MulVL src1 src2)); 5430 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 5431 ins_encode %{ 5432 assert(UseAVX > 2, "required"); 5433 int vlen_enc = vector_length_encoding(this); 5434 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5435 %} 5436 ins_pipe( pipe_slow ); 5437%} 5438 5439instruct vmulL_mem(vec dst, vec src, memory mem) %{ 5440 predicate(VM_Version::supports_avx512dq() && 5441 (vector_length_in_bytes(n->in(1)) > 8)); 5442 match(Set dst (MulVL src (LoadVector mem))); 5443 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 5444 ins_encode %{ 5445 assert(UseAVX > 2, "required"); 5446 int vlen_enc = vector_length_encoding(this); 5447 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5448 %} 5449 ins_pipe( pipe_slow ); 5450%} 5451 5452instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{ 5453 predicate(vector_length(n) == 2 && !VM_Version::supports_avx512dq()); 5454 match(Set dst (MulVL dst src2)); 5455 effect(TEMP dst, TEMP tmp); 5456 format %{ "pshufd $tmp,$src2, 177\n\t" 5457 "pmulld $tmp,$dst\n\t" 5458 "phaddd $tmp,$tmp\n\t" 5459 "pmovzxdq $tmp,$tmp\n\t" 5460 "psllq $tmp, 32\n\t" 5461 "pmuludq $dst,$src2\n\t" 5462 "paddq $dst,$tmp\n\t! mul packed2L" %} 5463 5464 ins_encode %{ 5465 assert(VM_Version::supports_sse4_1(), "required"); 5466 int vlen_enc = Assembler::AVX_128bit; 5467 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 5468 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 5469 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5470 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 5471 __ psllq($tmp$$XMMRegister, 32); 5472 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 5473 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 5474 %} 5475 ins_pipe( pipe_slow ); 5476%} 5477 5478instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, legVec tmp, legVec tmp1) %{ 5479 predicate(vector_length(n) == 4 && !VM_Version::supports_avx512dq()); 5480 match(Set dst (MulVL src1 src2)); 5481 effect(TEMP tmp1, TEMP tmp); 5482 format %{ "vpshufd $tmp,$src2\n\t" 5483 "vpmulld $tmp,$src1,$tmp\n\t" 5484 "vphaddd $tmp,$tmp,$tmp\n\t" 5485 "vpmovzxdq $tmp,$tmp\n\t" 5486 "vpsllq $tmp,$tmp\n\t" 5487 "vpmuludq $tmp1,$src1,$src2\n\t" 5488 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 5489 ins_encode %{ 5490 int vlen_enc = Assembler::AVX_256bit; 5491 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc); 5492 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5493 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 5494 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5495 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5496 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc); 5497 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5498 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5499 %} 5500 ins_pipe( pipe_slow ); 5501%} 5502 5503// Floats vector mul 5504instruct vmulF(vec dst, vec src) %{ 5505 predicate(UseAVX == 0); 5506 match(Set dst (MulVF dst src)); 5507 format %{ "mulps $dst,$src\t! mul packedF" %} 5508 ins_encode %{ 5509 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5510 %} 5511 ins_pipe( pipe_slow ); 5512%} 5513 5514instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5515 predicate(UseAVX > 0); 5516 match(Set dst (MulVF src1 src2)); 5517 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5518 ins_encode %{ 5519 int vlen_enc = vector_length_encoding(this); 5520 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5521 %} 5522 ins_pipe( pipe_slow ); 5523%} 5524 5525instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5526 predicate((UseAVX > 0) && 5527 (vector_length_in_bytes(n->in(1)) > 8)); 5528 match(Set dst (MulVF src (LoadVector mem))); 5529 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5530 ins_encode %{ 5531 int vlen_enc = vector_length_encoding(this); 5532 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5533 %} 5534 ins_pipe( pipe_slow ); 5535%} 5536 5537// Doubles vector mul 5538instruct vmulD(vec dst, vec src) %{ 5539 predicate(UseAVX == 0); 5540 match(Set dst (MulVD dst src)); 5541 format %{ "mulpd $dst,$src\t! mul packedD" %} 5542 ins_encode %{ 5543 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5544 %} 5545 ins_pipe( pipe_slow ); 5546%} 5547 5548instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5549 predicate(UseAVX > 0); 5550 match(Set dst (MulVD src1 src2)); 5551 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5552 ins_encode %{ 5553 int vlen_enc = vector_length_encoding(this); 5554 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5555 %} 5556 ins_pipe( pipe_slow ); 5557%} 5558 5559instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5560 predicate((UseAVX > 0) && 5561 (vector_length_in_bytes(n->in(1)) > 8)); 5562 match(Set dst (MulVD src (LoadVector mem))); 5563 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5564 ins_encode %{ 5565 int vlen_enc = vector_length_encoding(this); 5566 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5567 %} 5568 ins_pipe( pipe_slow ); 5569%} 5570 5571instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5572 predicate(vector_length(n) == 8); 5573 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 5574 effect(TEMP dst, USE src1, USE src2); 5575 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 5576 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 5577 %} 5578 ins_encode %{ 5579 assert(UseAVX > 0, "required"); 5580 5581 int vlen_enc = Assembler::AVX_256bit; 5582 int cond = (Assembler::Condition)($copnd$$cmpcode); 5583 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5584 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5585 %} 5586 ins_pipe( pipe_slow ); 5587%} 5588 5589instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5590 predicate(vector_length(n) == 4); 5591 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 5592 effect(TEMP dst, USE src1, USE src2); 5593 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 5594 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 5595 %} 5596 ins_encode %{ 5597 assert(UseAVX > 0, "required"); 5598 5599 int vlen_enc = Assembler::AVX_256bit; 5600 int cond = (Assembler::Condition)($copnd$$cmpcode); 5601 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5602 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5603 %} 5604 ins_pipe( pipe_slow ); 5605%} 5606 5607// --------------------------------- DIV -------------------------------------- 5608 5609// Floats vector div 5610instruct vdivF(vec dst, vec src) %{ 5611 predicate(UseAVX == 0); 5612 match(Set dst (DivVF dst src)); 5613 format %{ "divps $dst,$src\t! div packedF" %} 5614 ins_encode %{ 5615 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5616 %} 5617 ins_pipe( pipe_slow ); 5618%} 5619 5620instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5621 predicate(UseAVX > 0); 5622 match(Set dst (DivVF src1 src2)); 5623 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5624 ins_encode %{ 5625 int vlen_enc = vector_length_encoding(this); 5626 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5627 %} 5628 ins_pipe( pipe_slow ); 5629%} 5630 5631instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5632 predicate((UseAVX > 0) && 5633 (vector_length_in_bytes(n->in(1)) > 8)); 5634 match(Set dst (DivVF src (LoadVector mem))); 5635 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5636 ins_encode %{ 5637 int vlen_enc = vector_length_encoding(this); 5638 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5639 %} 5640 ins_pipe( pipe_slow ); 5641%} 5642 5643// Doubles vector div 5644instruct vdivD(vec dst, vec src) %{ 5645 predicate(UseAVX == 0); 5646 match(Set dst (DivVD dst src)); 5647 format %{ "divpd $dst,$src\t! div packedD" %} 5648 ins_encode %{ 5649 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5650 %} 5651 ins_pipe( pipe_slow ); 5652%} 5653 5654instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 5655 predicate(UseAVX > 0); 5656 match(Set dst (DivVD src1 src2)); 5657 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 5658 ins_encode %{ 5659 int vlen_enc = vector_length_encoding(this); 5660 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5661 %} 5662 ins_pipe( pipe_slow ); 5663%} 5664 5665instruct vdivD_mem(vec dst, vec src, memory mem) %{ 5666 predicate((UseAVX > 0) && 5667 (vector_length_in_bytes(n->in(1)) > 8)); 5668 match(Set dst (DivVD src (LoadVector mem))); 5669 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 5670 ins_encode %{ 5671 int vlen_enc = vector_length_encoding(this); 5672 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5673 %} 5674 ins_pipe( pipe_slow ); 5675%} 5676 5677// ------------------------------ MinMax --------------------------------------- 5678 5679// Byte, Short, Int vector Min/Max 5680instruct minmax_reg_sse(vec dst, vec src) %{ 5681 predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5682 UseAVX == 0); 5683 match(Set dst (MinV dst src)); 5684 match(Set dst (MaxV dst src)); 5685 format %{ "vector_minmax $dst,$src\t! " %} 5686 ins_encode %{ 5687 assert(UseSSE >= 4, "required"); 5688 5689 int opcode = this->ideal_Opcode(); 5690 BasicType elem_bt = vector_element_basic_type(this); 5691 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 5692 %} 5693 ins_pipe( pipe_slow ); 5694%} 5695 5696instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 5697 predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5698 UseAVX > 0); 5699 match(Set dst (MinV src1 src2)); 5700 match(Set dst (MaxV src1 src2)); 5701 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 5702 ins_encode %{ 5703 int opcode = this->ideal_Opcode(); 5704 int vlen_enc = vector_length_encoding(this); 5705 BasicType elem_bt = vector_element_basic_type(this); 5706 5707 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5708 %} 5709 ins_pipe( pipe_slow ); 5710%} 5711 5712// Long vector Min/Max 5713instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 5714 predicate(vector_length_in_bytes(n) == 16 && vector_element_basic_type(n) == T_LONG && 5715 UseAVX == 0); 5716 match(Set dst (MinV dst src)); 5717 match(Set dst (MaxV src dst)); 5718 effect(TEMP dst, TEMP tmp); 5719 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 5720 ins_encode %{ 5721 assert(UseSSE >= 4, "required"); 5722 5723 int opcode = this->ideal_Opcode(); 5724 BasicType elem_bt = vector_element_basic_type(this); 5725 assert(elem_bt == T_LONG, "sanity"); 5726 5727 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 5728 %} 5729 ins_pipe( pipe_slow ); 5730%} 5731 5732instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 5733 predicate(vector_length_in_bytes(n) <= 32 && vector_element_basic_type(n) == T_LONG && 5734 UseAVX > 0 && !VM_Version::supports_avx512vl()); 5735 match(Set dst (MinV src1 src2)); 5736 match(Set dst (MaxV src1 src2)); 5737 effect(TEMP dst); 5738 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 5739 ins_encode %{ 5740 int vlen_enc = vector_length_encoding(this); 5741 int opcode = this->ideal_Opcode(); 5742 BasicType elem_bt = vector_element_basic_type(this); 5743 assert(elem_bt == T_LONG, "sanity"); 5744 5745 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5746 %} 5747 ins_pipe( pipe_slow ); 5748%} 5749 5750instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 5751 predicate((vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 5752 vector_element_basic_type(n) == T_LONG); 5753 match(Set dst (MinV src1 src2)); 5754 match(Set dst (MaxV src1 src2)); 5755 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 5756 ins_encode %{ 5757 assert(UseAVX > 2, "required"); 5758 5759 int vlen_enc = vector_length_encoding(this); 5760 int opcode = this->ideal_Opcode(); 5761 BasicType elem_bt = vector_element_basic_type(this); 5762 assert(elem_bt == T_LONG, "sanity"); 5763 5764 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5765 %} 5766 ins_pipe( pipe_slow ); 5767%} 5768 5769// Float/Double vector Min/Max 5770instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 5771 predicate(vector_length_in_bytes(n) <= 32 && 5772 is_floating_point_type(vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 5773 UseAVX > 0); 5774 match(Set dst (MinV a b)); 5775 match(Set dst (MaxV a b)); 5776 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 5777 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 5778 ins_encode %{ 5779 assert(UseAVX > 0, "required"); 5780 5781 int opcode = this->ideal_Opcode(); 5782 int vlen_enc = vector_length_encoding(this); 5783 BasicType elem_bt = vector_element_basic_type(this); 5784 5785 __ vminmax_fp(opcode, elem_bt, 5786 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 5787 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 5788 %} 5789 ins_pipe( pipe_slow ); 5790%} 5791 5792instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 5793 predicate(vector_length_in_bytes(n) == 64 && 5794 is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 5795 match(Set dst (MinV a b)); 5796 match(Set dst (MaxV a b)); 5797 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 5798 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 5799 ins_encode %{ 5800 assert(UseAVX > 2, "required"); 5801 5802 int opcode = this->ideal_Opcode(); 5803 int vlen_enc = vector_length_encoding(this); 5804 BasicType elem_bt = vector_element_basic_type(this); 5805 5806 __ evminmax_fp(opcode, elem_bt, 5807 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 5808 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 5809 %} 5810 ins_pipe( pipe_slow ); 5811%} 5812 5813// --------------------------------- Signum --------------------------- 5814 5815instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{ 5816 match(Set dst (SignumF dst (Binary zero one))); 5817 effect(TEMP scratch, KILL cr); 5818 format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %} 5819 ins_encode %{ 5820 int opcode = this->ideal_Opcode(); 5821 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 5822 %} 5823 ins_pipe( pipe_slow ); 5824%} 5825 5826instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{ 5827 match(Set dst (SignumD dst (Binary zero one))); 5828 effect(TEMP scratch, KILL cr); 5829 format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %} 5830 ins_encode %{ 5831 int opcode = this->ideal_Opcode(); 5832 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 5833 %} 5834 ins_pipe( pipe_slow ); 5835%} 5836 5837// --------------------------------- Sqrt -------------------------------------- 5838 5839instruct vsqrtF_reg(vec dst, vec src) %{ 5840 match(Set dst (SqrtVF src)); 5841 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 5842 ins_encode %{ 5843 assert(UseAVX > 0, "required"); 5844 int vlen_enc = vector_length_encoding(this); 5845 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5846 %} 5847 ins_pipe( pipe_slow ); 5848%} 5849 5850instruct vsqrtF_mem(vec dst, memory mem) %{ 5851 predicate(vector_length_in_bytes(n->in(1)) > 8); 5852 match(Set dst (SqrtVF (LoadVector mem))); 5853 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 5854 ins_encode %{ 5855 assert(UseAVX > 0, "required"); 5856 int vlen_enc = vector_length_encoding(this); 5857 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 5858 %} 5859 ins_pipe( pipe_slow ); 5860%} 5861 5862// Floating point vector sqrt 5863instruct vsqrtD_reg(vec dst, vec src) %{ 5864 match(Set dst (SqrtVD src)); 5865 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 5866 ins_encode %{ 5867 assert(UseAVX > 0, "required"); 5868 int vlen_enc = vector_length_encoding(this); 5869 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5870 %} 5871 ins_pipe( pipe_slow ); 5872%} 5873 5874instruct vsqrtD_mem(vec dst, memory mem) %{ 5875 predicate(vector_length_in_bytes(n->in(1)) > 8); 5876 match(Set dst (SqrtVD (LoadVector mem))); 5877 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 5878 ins_encode %{ 5879 assert(UseAVX > 0, "required"); 5880 int vlen_enc = vector_length_encoding(this); 5881 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 5882 %} 5883 ins_pipe( pipe_slow ); 5884%} 5885 5886// ------------------------------ Shift --------------------------------------- 5887 5888// Left and right shift count vectors are the same on x86 5889// (only lowest bits of xmm reg are used for count). 5890instruct vshiftcnt(vec dst, rRegI cnt) %{ 5891 match(Set dst (LShiftCntV cnt)); 5892 match(Set dst (RShiftCntV cnt)); 5893 format %{ "movdl $dst,$cnt\t! load shift count" %} 5894 ins_encode %{ 5895 __ movdl($dst$$XMMRegister, $cnt$$Register); 5896 %} 5897 ins_pipe( pipe_slow ); 5898%} 5899 5900// Byte vector shift 5901instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5902 predicate(vector_length(n) <= 8 && VectorNode::is_vshift_cnt(n->in(2))); 5903 match(Set dst ( LShiftVB src shift)); 5904 match(Set dst ( RShiftVB src shift)); 5905 match(Set dst (URShiftVB src shift)); 5906 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 5907 format %{"vector_byte_shift $dst,$src,$shift" %} 5908 ins_encode %{ 5909 assert(UseSSE > 3, "required"); 5910 int opcode = this->ideal_Opcode(); 5911 bool sign = (opcode != Op_URShiftVB); 5912 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 5913 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 5914 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5915 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5916 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5917 %} 5918 ins_pipe( pipe_slow ); 5919%} 5920 5921instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 5922 predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && 5923 UseAVX <= 1); 5924 match(Set dst ( LShiftVB src shift)); 5925 match(Set dst ( RShiftVB src shift)); 5926 match(Set dst (URShiftVB src shift)); 5927 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 5928 format %{"vector_byte_shift $dst,$src,$shift" %} 5929 ins_encode %{ 5930 assert(UseSSE > 3, "required"); 5931 int opcode = this->ideal_Opcode(); 5932 bool sign = (opcode != Op_URShiftVB); 5933 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 5934 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 5935 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 5936 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 5937 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 5938 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5939 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5940 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5941 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5942 %} 5943 ins_pipe( pipe_slow ); 5944%} 5945 5946instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5947 predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) && 5948 UseAVX > 1); 5949 match(Set dst ( LShiftVB src shift)); 5950 match(Set dst ( RShiftVB src shift)); 5951 match(Set dst (URShiftVB src shift)); 5952 effect(TEMP dst, TEMP tmp, TEMP scratch); 5953 format %{"vector_byte_shift $dst,$src,$shift" %} 5954 ins_encode %{ 5955 int opcode = this->ideal_Opcode(); 5956 bool sign = (opcode != Op_URShiftVB); 5957 int vlen_enc = Assembler::AVX_256bit; 5958 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 5959 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5960 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5961 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 5962 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 5963 %} 5964 ins_pipe( pipe_slow ); 5965%} 5966 5967instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 5968 predicate(vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2))); 5969 match(Set dst ( LShiftVB src shift)); 5970 match(Set dst ( RShiftVB src shift)); 5971 match(Set dst (URShiftVB src shift)); 5972 effect(TEMP dst, TEMP tmp, TEMP scratch); 5973 format %{"vector_byte_shift $dst,$src,$shift" %} 5974 ins_encode %{ 5975 assert(UseAVX > 1, "required"); 5976 int opcode = this->ideal_Opcode(); 5977 bool sign = (opcode != Op_URShiftVB); 5978 int vlen_enc = Assembler::AVX_256bit; 5979 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 5980 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5981 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 5982 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5983 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 5984 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5985 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 5986 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5987 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5988 %} 5989 ins_pipe( pipe_slow ); 5990%} 5991 5992instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 5993 predicate(vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2))); 5994 match(Set dst ( LShiftVB src shift)); 5995 match(Set dst (RShiftVB src shift)); 5996 match(Set dst (URShiftVB src shift)); 5997 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5998 format %{"vector_byte_shift $dst,$src,$shift" %} 5999 ins_encode %{ 6000 assert(UseAVX > 2, "required"); 6001 int opcode = this->ideal_Opcode(); 6002 bool sign = (opcode != Op_URShiftVB); 6003 int vlen_enc = Assembler::AVX_512bit; 6004 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6005 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6006 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6007 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6008 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6009 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6010 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6011 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6012 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6013 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6014 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 6015 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6016 %} 6017 ins_pipe( pipe_slow ); 6018%} 6019 6020// Shorts vector logical right shift produces incorrect Java result 6021// for negative data because java code convert short value into int with 6022// sign extension before a shift. But char vectors are fine since chars are 6023// unsigned values. 6024// Shorts/Chars vector left shift 6025instruct vshiftS(vec dst, vec src, vec shift) %{ 6026 predicate(VectorNode::is_vshift_cnt(n->in(2))); 6027 match(Set dst ( LShiftVS src shift)); 6028 match(Set dst ( RShiftVS src shift)); 6029 match(Set dst (URShiftVS src shift)); 6030 effect(TEMP dst, USE src, USE shift); 6031 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6032 ins_encode %{ 6033 int opcode = this->ideal_Opcode(); 6034 if (UseAVX > 0) { 6035 int vlen_enc = vector_length_encoding(this); 6036 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6037 } else { 6038 int vlen = vector_length(this); 6039 if (vlen == 2) { 6040 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6041 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6042 } else if (vlen == 4) { 6043 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6044 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6045 } else { 6046 assert (vlen == 8, "sanity"); 6047 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6048 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6049 } 6050 } 6051 %} 6052 ins_pipe( pipe_slow ); 6053%} 6054 6055// Integers vector left shift 6056instruct vshiftI(vec dst, vec src, vec shift) %{ 6057 predicate(VectorNode::is_vshift_cnt(n->in(2))); 6058 match(Set dst ( LShiftVI src shift)); 6059 match(Set dst ( RShiftVI src shift)); 6060 match(Set dst (URShiftVI src shift)); 6061 effect(TEMP dst, USE src, USE shift); 6062 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6063 ins_encode %{ 6064 int opcode = this->ideal_Opcode(); 6065 if (UseAVX > 0) { 6066 int vlen_enc = vector_length_encoding(this); 6067 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6068 } else { 6069 int vlen = vector_length(this); 6070 if (vlen == 2) { 6071 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6072 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6073 } else { 6074 assert(vlen == 4, "sanity"); 6075 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6076 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6077 } 6078 } 6079 %} 6080 ins_pipe( pipe_slow ); 6081%} 6082 6083// Integers vector left constant shift 6084instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6085 match(Set dst (LShiftVI src (LShiftCntV shift))); 6086 match(Set dst (RShiftVI src (RShiftCntV shift))); 6087 match(Set dst (URShiftVI src (RShiftCntV shift))); 6088 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6089 ins_encode %{ 6090 int opcode = this->ideal_Opcode(); 6091 if (UseAVX > 0) { 6092 int vector_len = vector_length_encoding(this); 6093 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6094 } else { 6095 int vlen = vector_length(this); 6096 if (vlen == 2) { 6097 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6098 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6099 } else { 6100 assert(vlen == 4, "sanity"); 6101 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6102 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6103 } 6104 } 6105 %} 6106 ins_pipe( pipe_slow ); 6107%} 6108 6109// Longs vector shift 6110instruct vshiftL(vec dst, vec src, vec shift) %{ 6111 predicate(VectorNode::is_vshift_cnt(n->in(2))); 6112 match(Set dst ( LShiftVL src shift)); 6113 match(Set dst (URShiftVL src shift)); 6114 effect(TEMP dst, USE src, USE shift); 6115 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6116 ins_encode %{ 6117 int opcode = this->ideal_Opcode(); 6118 if (UseAVX > 0) { 6119 int vlen_enc = vector_length_encoding(this); 6120 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6121 } else { 6122 assert(vector_length(this) == 2, ""); 6123 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6124 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6125 } 6126 %} 6127 ins_pipe( pipe_slow ); 6128%} 6129 6130// Longs vector constant shift 6131instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6132 match(Set dst (LShiftVL src (LShiftCntV shift))); 6133 match(Set dst (URShiftVL src (RShiftCntV shift))); 6134 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6135 ins_encode %{ 6136 int opcode = this->ideal_Opcode(); 6137 if (UseAVX > 0) { 6138 int vector_len = vector_length_encoding(this); 6139 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6140 } else { 6141 assert(vector_length(this) == 2, ""); 6142 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6143 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6144 } 6145 %} 6146 ins_pipe( pipe_slow ); 6147%} 6148 6149// -------------------ArithmeticRightShift ----------------------------------- 6150// Long vector arithmetic right shift 6151instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6152 predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX <= 2); 6153 match(Set dst (RShiftVL src shift)); 6154 effect(TEMP dst, TEMP tmp, TEMP scratch); 6155 format %{ "vshiftq $dst,$src,$shift" %} 6156 ins_encode %{ 6157 uint vlen = vector_length(this); 6158 if (vlen == 2) { 6159 assert(UseSSE >= 2, "required"); 6160 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6161 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6162 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6163 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6164 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6165 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6166 } else { 6167 assert(vlen == 4, "sanity"); 6168 assert(UseAVX > 1, "required"); 6169 int vlen_enc = Assembler::AVX_256bit; 6170 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6171 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6172 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6173 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6174 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6175 } 6176 %} 6177 ins_pipe( pipe_slow ); 6178%} 6179 6180instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6181 predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX > 2); 6182 match(Set dst (RShiftVL src shift)); 6183 format %{ "vshiftq $dst,$src,$shift" %} 6184 ins_encode %{ 6185 int vlen_enc = vector_length_encoding(this); 6186 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6187 %} 6188 ins_pipe( pipe_slow ); 6189%} 6190 6191// ------------------- Variable Shift ----------------------------- 6192// Byte variable shift 6193instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6194 predicate(vector_length(n) <= 8 && 6195 !VectorNode::is_vshift_cnt(n->in(2)) && 6196 !VM_Version::supports_avx512bw()); 6197 match(Set dst ( LShiftVB src shift)); 6198 match(Set dst ( RShiftVB src shift)); 6199 match(Set dst (URShiftVB src shift)); 6200 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6201 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6202 ins_encode %{ 6203 assert(UseAVX >= 2, "required"); 6204 6205 int opcode = this->ideal_Opcode(); 6206 int vlen_enc = Assembler::AVX_128bit; 6207 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6208 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6209 %} 6210 ins_pipe( pipe_slow ); 6211%} 6212 6213instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6214 predicate(vector_length(n) == 16 && 6215 !VectorNode::is_vshift_cnt(n->in(2)) && 6216 !VM_Version::supports_avx512bw()); 6217 match(Set dst ( LShiftVB src shift)); 6218 match(Set dst ( RShiftVB src shift)); 6219 match(Set dst (URShiftVB src shift)); 6220 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6221 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6222 ins_encode %{ 6223 assert(UseAVX >= 2, "required"); 6224 6225 int opcode = this->ideal_Opcode(); 6226 int vlen_enc = Assembler::AVX_128bit; 6227 // Shift lower half and get word result in dst 6228 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6229 6230 // Shift upper half and get word result in vtmp1 6231 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6232 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6233 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6234 6235 // Merge and down convert the two word results to byte in dst 6236 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6237 %} 6238 ins_pipe( pipe_slow ); 6239%} 6240 6241instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ 6242 predicate(vector_length(n) == 32 && 6243 !VectorNode::is_vshift_cnt(n->in(2)) && 6244 !VM_Version::supports_avx512bw()); 6245 match(Set dst ( LShiftVB src shift)); 6246 match(Set dst ( RShiftVB src shift)); 6247 match(Set dst (URShiftVB src shift)); 6248 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); 6249 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} 6250 ins_encode %{ 6251 assert(UseAVX >= 2, "required"); 6252 6253 int opcode = this->ideal_Opcode(); 6254 int vlen_enc = Assembler::AVX_128bit; 6255 // Process lower 128 bits and get result in dst 6256 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6257 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6258 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6259 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6260 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6261 6262 // Process higher 128 bits and get result in vtmp3 6263 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6264 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6265 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); 6266 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6267 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6268 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6269 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6270 6271 // Merge the two results in dst 6272 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6273 %} 6274 ins_pipe( pipe_slow ); 6275%} 6276 6277instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6278 predicate(vector_length(n) <= 32 && 6279 !VectorNode::is_vshift_cnt(n->in(2)) && 6280 VM_Version::supports_avx512bw()); 6281 match(Set dst ( LShiftVB src shift)); 6282 match(Set dst ( RShiftVB src shift)); 6283 match(Set dst (URShiftVB src shift)); 6284 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6285 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6286 ins_encode %{ 6287 assert(UseAVX > 2, "required"); 6288 6289 int opcode = this->ideal_Opcode(); 6290 int vlen_enc = vector_length_encoding(this); 6291 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6292 %} 6293 ins_pipe( pipe_slow ); 6294%} 6295 6296instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6297 predicate(vector_length(n) == 64 && 6298 !VectorNode::is_vshift_cnt(n->in(2)) && 6299 VM_Version::supports_avx512bw()); 6300 match(Set dst ( LShiftVB src shift)); 6301 match(Set dst ( RShiftVB src shift)); 6302 match(Set dst (URShiftVB src shift)); 6303 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6304 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6305 ins_encode %{ 6306 assert(UseAVX > 2, "required"); 6307 6308 int opcode = this->ideal_Opcode(); 6309 int vlen_enc = Assembler::AVX_256bit; 6310 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6311 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6312 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6313 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6314 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6315 %} 6316 ins_pipe( pipe_slow ); 6317%} 6318 6319// Short variable shift 6320instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6321 predicate(vector_length(n) <= 8 && 6322 !VectorNode::is_vshift_cnt(n->in(2)) && 6323 !VM_Version::supports_avx512bw()); 6324 match(Set dst ( LShiftVS src shift)); 6325 match(Set dst ( RShiftVS src shift)); 6326 match(Set dst (URShiftVS src shift)); 6327 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6328 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6329 ins_encode %{ 6330 assert(UseAVX >= 2, "required"); 6331 6332 int opcode = this->ideal_Opcode(); 6333 bool sign = (opcode != Op_URShiftVS); 6334 int vlen_enc = Assembler::AVX_256bit; 6335 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6336 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6337 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6338 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6339 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6340 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6341 %} 6342 ins_pipe( pipe_slow ); 6343%} 6344 6345instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6346 predicate(vector_length(n) == 16 && 6347 !VectorNode::is_vshift_cnt(n->in(2)) && 6348 !VM_Version::supports_avx512bw()); 6349 match(Set dst ( LShiftVS src shift)); 6350 match(Set dst ( RShiftVS src shift)); 6351 match(Set dst (URShiftVS src shift)); 6352 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6353 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6354 ins_encode %{ 6355 assert(UseAVX >= 2, "required"); 6356 6357 int opcode = this->ideal_Opcode(); 6358 bool sign = (opcode != Op_URShiftVS); 6359 int vlen_enc = Assembler::AVX_256bit; 6360 // Shift lower half, with result in vtmp2 usign vtmp1 as TEMP 6361 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6362 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6363 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6364 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6365 6366 // Shift upper half, with result in dst usign vtmp1 as TEMP 6367 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6368 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6369 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6370 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6371 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6372 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6373 6374 // Merge lower and upper half result into dst 6375 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6376 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6377 %} 6378 ins_pipe( pipe_slow ); 6379%} 6380 6381instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6382 predicate(!VectorNode::is_vshift_cnt(n->in(2)) && 6383 VM_Version::supports_avx512bw()); 6384 match(Set dst ( LShiftVS src shift)); 6385 match(Set dst ( RShiftVS src shift)); 6386 match(Set dst (URShiftVS src shift)); 6387 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6388 ins_encode %{ 6389 assert(UseAVX > 2, "required"); 6390 6391 int opcode = this->ideal_Opcode(); 6392 int vlen_enc = vector_length_encoding(this); 6393 if (!VM_Version::supports_avx512vl()) { 6394 vlen_enc = Assembler::AVX_512bit; 6395 } 6396 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6397 %} 6398 ins_pipe( pipe_slow ); 6399%} 6400 6401//Integer variable shift 6402instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6403 predicate(!VectorNode::is_vshift_cnt(n->in(2))); 6404 match(Set dst ( LShiftVI src shift)); 6405 match(Set dst ( RShiftVI src shift)); 6406 match(Set dst (URShiftVI src shift)); 6407 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6408 ins_encode %{ 6409 assert(UseAVX >= 2, "required"); 6410 6411 int opcode = this->ideal_Opcode(); 6412 int vlen_enc = vector_length_encoding(this); 6413 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6414 %} 6415 ins_pipe( pipe_slow ); 6416%} 6417 6418//Long variable shift 6419instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6420 predicate(!VectorNode::is_vshift_cnt(n->in(2))); 6421 match(Set dst ( LShiftVL src shift)); 6422 match(Set dst (URShiftVL src shift)); 6423 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6424 ins_encode %{ 6425 assert(UseAVX >= 2, "required"); 6426 6427 int opcode = this->ideal_Opcode(); 6428 int vlen_enc = vector_length_encoding(this); 6429 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6430 %} 6431 ins_pipe( pipe_slow ); 6432%} 6433 6434//Long variable right shift arithmetic 6435instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6436 predicate(vector_length(n) <= 4 && 6437 !VectorNode::is_vshift_cnt(n->in(2)) && 6438 UseAVX == 2); 6439 match(Set dst (RShiftVL src shift)); 6440 effect(TEMP dst, TEMP vtmp); 6441 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6442 ins_encode %{ 6443 int opcode = this->ideal_Opcode(); 6444 int vlen_enc = vector_length_encoding(this); 6445 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6446 $vtmp$$XMMRegister); 6447 %} 6448 ins_pipe( pipe_slow ); 6449%} 6450 6451instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6452 predicate(!VectorNode::is_vshift_cnt(n->in(2)) && 6453 UseAVX > 2); 6454 match(Set dst (RShiftVL src shift)); 6455 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6456 ins_encode %{ 6457 int opcode = this->ideal_Opcode(); 6458 int vlen_enc = vector_length_encoding(this); 6459 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6460 %} 6461 ins_pipe( pipe_slow ); 6462%} 6463 6464// --------------------------------- AND -------------------------------------- 6465 6466instruct vand(vec dst, vec src) %{ 6467 predicate(UseAVX == 0); 6468 match(Set dst (AndV dst src)); 6469 format %{ "pand $dst,$src\t! and vectors" %} 6470 ins_encode %{ 6471 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6472 %} 6473 ins_pipe( pipe_slow ); 6474%} 6475 6476instruct vand_reg(vec dst, vec src1, vec src2) %{ 6477 predicate(UseAVX > 0); 6478 match(Set dst (AndV src1 src2)); 6479 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6480 ins_encode %{ 6481 int vlen_enc = vector_length_encoding(this); 6482 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6483 %} 6484 ins_pipe( pipe_slow ); 6485%} 6486 6487instruct vand_mem(vec dst, vec src, memory mem) %{ 6488 predicate((UseAVX > 0) && 6489 (vector_length_in_bytes(n->in(1)) > 8)); 6490 match(Set dst (AndV src (LoadVector mem))); 6491 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6492 ins_encode %{ 6493 int vlen_enc = vector_length_encoding(this); 6494 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6495 %} 6496 ins_pipe( pipe_slow ); 6497%} 6498 6499// --------------------------------- OR --------------------------------------- 6500 6501instruct vor(vec dst, vec src) %{ 6502 predicate(UseAVX == 0); 6503 match(Set dst (OrV dst src)); 6504 format %{ "por $dst,$src\t! or vectors" %} 6505 ins_encode %{ 6506 __ por($dst$$XMMRegister, $src$$XMMRegister); 6507 %} 6508 ins_pipe( pipe_slow ); 6509%} 6510 6511instruct vor_reg(vec dst, vec src1, vec src2) %{ 6512 predicate(UseAVX > 0); 6513 match(Set dst (OrV src1 src2)); 6514 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6515 ins_encode %{ 6516 int vlen_enc = vector_length_encoding(this); 6517 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6518 %} 6519 ins_pipe( pipe_slow ); 6520%} 6521 6522instruct vor_mem(vec dst, vec src, memory mem) %{ 6523 predicate((UseAVX > 0) && 6524 (vector_length_in_bytes(n->in(1)) > 8)); 6525 match(Set dst (OrV src (LoadVector mem))); 6526 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6527 ins_encode %{ 6528 int vlen_enc = vector_length_encoding(this); 6529 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6530 %} 6531 ins_pipe( pipe_slow ); 6532%} 6533 6534// --------------------------------- XOR -------------------------------------- 6535 6536instruct vxor(vec dst, vec src) %{ 6537 predicate(UseAVX == 0); 6538 match(Set dst (XorV dst src)); 6539 format %{ "pxor $dst,$src\t! xor vectors" %} 6540 ins_encode %{ 6541 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 6542 %} 6543 ins_pipe( pipe_slow ); 6544%} 6545 6546instruct vxor_reg(vec dst, vec src1, vec src2) %{ 6547 predicate(UseAVX > 0); 6548 match(Set dst (XorV src1 src2)); 6549 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 6550 ins_encode %{ 6551 int vlen_enc = vector_length_encoding(this); 6552 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6553 %} 6554 ins_pipe( pipe_slow ); 6555%} 6556 6557instruct vxor_mem(vec dst, vec src, memory mem) %{ 6558 predicate((UseAVX > 0) && 6559 (vector_length_in_bytes(n->in(1)) > 8)); 6560 match(Set dst (XorV src (LoadVector mem))); 6561 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 6562 ins_encode %{ 6563 int vlen_enc = vector_length_encoding(this); 6564 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6565 %} 6566 ins_pipe( pipe_slow ); 6567%} 6568 6569// --------------------------------- VectorCast -------------------------------------- 6570 6571instruct vcastBtoX(vec dst, vec src) %{ 6572 match(Set dst (VectorCastB2X src)); 6573 format %{ "vector_cast_b2x $dst,$src\t!" %} 6574 ins_encode %{ 6575 assert(UseAVX > 0, "required"); 6576 6577 BasicType to_elem_bt = vector_element_basic_type(this); 6578 int vlen_enc = vector_length_encoding(this); 6579 switch (to_elem_bt) { 6580 case T_SHORT: 6581 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6582 break; 6583 case T_INT: 6584 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6585 break; 6586 case T_FLOAT: 6587 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6588 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6589 break; 6590 case T_LONG: 6591 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6592 break; 6593 case T_DOUBLE: 6594 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6595 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6596 break; 6597 6598 default: assert(false, "%s", type2name(to_elem_bt)); 6599 } 6600 %} 6601 ins_pipe( pipe_slow ); 6602%} 6603 6604instruct castStoX(vec dst, vec src, rRegP scratch) %{ 6605 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6606 vector_length(n->in(1)) <= 8 && // src 6607 vector_element_basic_type(n) == T_BYTE); 6608 effect(TEMP scratch); 6609 match(Set dst (VectorCastS2X src)); 6610 format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} 6611 ins_encode %{ 6612 assert(UseAVX > 0, "required"); 6613 6614 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); 6615 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6616 %} 6617 ins_pipe( pipe_slow ); 6618%} 6619 6620instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6621 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6622 vector_length(n->in(1)) == 16 && // src 6623 vector_element_basic_type(n) == T_BYTE); 6624 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6625 match(Set dst (VectorCastS2X src)); 6626 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} 6627 ins_encode %{ 6628 assert(UseAVX > 0, "required"); 6629 6630 int vlen_enc = vector_length_encoding(vector_length_in_bytes(this, $src)); 6631 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6632 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 6633 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6634 %} 6635 ins_pipe( pipe_slow ); 6636%} 6637 6638instruct vcastStoX_evex(vec dst, vec src) %{ 6639 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 6640 (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src 6641 match(Set dst (VectorCastS2X src)); 6642 format %{ "vector_cast_s2x $dst,$src\t!" %} 6643 ins_encode %{ 6644 BasicType to_elem_bt = vector_element_basic_type(this); 6645 int src_vlen_enc = vector_length_encoding(this, $src); 6646 int vlen_enc = vector_length_encoding(this); 6647 switch (to_elem_bt) { 6648 case T_BYTE: 6649 if (!VM_Version::supports_avx512vl()) { 6650 vlen_enc = Assembler::AVX_512bit; 6651 } 6652 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6653 break; 6654 case T_INT: 6655 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6656 break; 6657 case T_FLOAT: 6658 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6659 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6660 break; 6661 case T_LONG: 6662 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6663 break; 6664 case T_DOUBLE: 6665 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6666 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6667 break; 6668 default: 6669 ShouldNotReachHere(); 6670 } 6671 %} 6672 ins_pipe( pipe_slow ); 6673%} 6674 6675instruct castItoX(vec dst, vec src, rRegP scratch) %{ 6676 predicate(UseAVX <= 2 && 6677 (vector_length_in_bytes(n->in(1)) <= 16) && 6678 (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src 6679 match(Set dst (VectorCastI2X src)); 6680 format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} 6681 effect(TEMP scratch); 6682 ins_encode %{ 6683 assert(UseAVX > 0, "required"); 6684 6685 BasicType to_elem_bt = vector_element_basic_type(this); 6686 int vlen_enc = vector_length_encoding(this, $src); 6687 6688 if (to_elem_bt == T_BYTE) { 6689 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 6690 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6691 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6692 } else { 6693 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 6694 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6695 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6696 } 6697 %} 6698 ins_pipe( pipe_slow ); 6699%} 6700 6701instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6702 predicate(UseAVX <= 2 && 6703 (vector_length_in_bytes(n->in(1)) == 32) && 6704 (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src 6705 match(Set dst (VectorCastI2X src)); 6706 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} 6707 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6708 ins_encode %{ 6709 assert(UseAVX > 0, "required"); 6710 6711 BasicType to_elem_bt = vector_element_basic_type(this); 6712 int vlen_enc = vector_length_encoding(this, $src); 6713 6714 if (to_elem_bt == T_BYTE) { 6715 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 6716 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 6717 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6718 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6719 } else { 6720 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 6721 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6722 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 6723 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6724 } 6725 %} 6726 ins_pipe( pipe_slow ); 6727%} 6728 6729instruct vcastItoX_evex(vec dst, vec src) %{ 6730 predicate(UseAVX > 2 || 6731 (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src 6732 match(Set dst (VectorCastI2X src)); 6733 format %{ "vector_cast_i2x $dst,$src\t!" %} 6734 ins_encode %{ 6735 assert(UseAVX > 0, "required"); 6736 6737 BasicType dst_elem_bt = vector_element_basic_type(this); 6738 int src_vlen_enc = vector_length_encoding(this, $src); 6739 int dst_vlen_enc = vector_length_encoding(this); 6740 switch (dst_elem_bt) { 6741 case T_BYTE: 6742 if (!VM_Version::supports_avx512vl()) { 6743 src_vlen_enc = Assembler::AVX_512bit; 6744 } 6745 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6746 break; 6747 case T_SHORT: 6748 if (!VM_Version::supports_avx512vl()) { 6749 src_vlen_enc = Assembler::AVX_512bit; 6750 } 6751 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6752 break; 6753 case T_FLOAT: 6754 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 6755 break; 6756 case T_LONG: 6757 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 6758 break; 6759 case T_DOUBLE: 6760 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 6761 break; 6762 default: 6763 ShouldNotReachHere(); 6764 } 6765 %} 6766 ins_pipe( pipe_slow ); 6767%} 6768 6769instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ 6770 predicate((vector_element_basic_type(n) == T_BYTE || vector_element_basic_type(n) == T_SHORT) && 6771 UseAVX <= 2); 6772 match(Set dst (VectorCastL2X src)); 6773 effect(TEMP scratch); 6774 format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} 6775 ins_encode %{ 6776 assert(UseAVX > 0, "required"); 6777 6778 int vlen = vector_length_in_bytes(this, $src); 6779 BasicType to_elem_bt = vector_element_basic_type(this); 6780 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 6781 : ExternalAddress(vector_int_to_short_mask()); 6782 if (vlen <= 16) { 6783 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 6784 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 6785 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6786 } else { 6787 assert(vlen <= 32, "required"); 6788 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 6789 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 6790 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 6791 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6792 } 6793 if (to_elem_bt == T_BYTE) { 6794 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 6795 } 6796 %} 6797 ins_pipe( pipe_slow ); 6798%} 6799 6800instruct vcastLtoX_evex(vec dst, vec src) %{ 6801 predicate(UseAVX > 2 || 6802 (vector_element_basic_type(n) == T_INT || 6803 vector_element_basic_type(n) == T_FLOAT || 6804 vector_element_basic_type(n) == T_DOUBLE)); 6805 match(Set dst (VectorCastL2X src)); 6806 format %{ "vector_cast_l2x $dst,$src\t!" %} 6807 ins_encode %{ 6808 BasicType to_elem_bt = vector_element_basic_type(this); 6809 int vlen = vector_length_in_bytes(this, $src); 6810 int vlen_enc = vector_length_encoding(this, $src); 6811 switch (to_elem_bt) { 6812 case T_BYTE: 6813 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 6814 vlen_enc = Assembler::AVX_512bit; 6815 } 6816 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6817 break; 6818 case T_SHORT: 6819 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 6820 vlen_enc = Assembler::AVX_512bit; 6821 } 6822 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6823 break; 6824 case T_INT: 6825 if (vlen == 8) { 6826 if ($dst$$XMMRegister != $src$$XMMRegister) { 6827 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6828 } 6829 } else if (vlen == 16) { 6830 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 6831 } else if (vlen == 32) { 6832 if (UseAVX > 2) { 6833 if (!VM_Version::supports_avx512vl()) { 6834 vlen_enc = Assembler::AVX_512bit; 6835 } 6836 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6837 } else { 6838 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 6839 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 6840 } 6841 } else { // vlen == 64 6842 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6843 } 6844 break; 6845 case T_FLOAT: 6846 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 6847 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6848 break; 6849 case T_DOUBLE: 6850 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 6851 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6852 break; 6853 6854 default: assert(false, "%s", type2name(to_elem_bt)); 6855 } 6856 %} 6857 ins_pipe( pipe_slow ); 6858%} 6859 6860instruct vcastFtoD_reg(vec dst, vec src) %{ 6861 predicate(vector_element_basic_type(n) == T_DOUBLE); 6862 match(Set dst (VectorCastF2X src)); 6863 format %{ "vector_cast_f2x $dst,$src\t!" %} 6864 ins_encode %{ 6865 int vlen_enc = vector_length_encoding(this); 6866 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6867 %} 6868 ins_pipe( pipe_slow ); 6869%} 6870 6871instruct vcastDtoF_reg(vec dst, vec src) %{ 6872 predicate(vector_element_basic_type(n) == T_FLOAT); 6873 match(Set dst (VectorCastD2X src)); 6874 format %{ "vector_cast_d2x $dst,$src\t!" %} 6875 ins_encode %{ 6876 int vlen_enc = vector_length_encoding(this, $src); 6877 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6878 %} 6879 ins_pipe( pipe_slow ); 6880%} 6881 6882// --------------------------------- VectorMaskCmp -------------------------------------- 6883 6884instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 6885 predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 6886 vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 6887 is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 6888 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6889 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 6890 ins_encode %{ 6891 int vlen_enc = vector_length_encoding(this, $src1); 6892 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 6893 if (vector_element_basic_type(this, $src1) == T_FLOAT) { 6894 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6895 } else { 6896 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6897 } 6898 %} 6899 ins_pipe( pipe_slow ); 6900%} 6901 6902instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 6903 predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 6904 is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 6905 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6906 effect(TEMP scratch, TEMP ktmp); 6907 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6908 ins_encode %{ 6909 int vlen_enc = Assembler::AVX_512bit; 6910 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 6911 KRegister mask = k0; // The comparison itself is not being masked. 6912 if (vector_element_basic_type(this, $src1) == T_FLOAT) { 6913 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6914 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 6915 } else { 6916 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 6917 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 6918 } 6919 %} 6920 ins_pipe( pipe_slow ); 6921%} 6922 6923instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{ 6924 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vl()) && 6925 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 6926 vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 6927 vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 6928 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6929 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6930 effect(TEMP scratch); 6931 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6932 ins_encode %{ 6933 int vlen_enc = vector_length_encoding(this, $src1); 6934 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6935 Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1)); 6936 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register); 6937 %} 6938 ins_pipe( pipe_slow ); 6939%} 6940 6941instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 6942 predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) && 6943 is_unsigned_booltest_pred(n->in(2)->get_int()) && 6944 vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 6945 vector_length_in_bytes(n->in(1)->in(1)) <= 16 && // src1 6946 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6947 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6948 effect(TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6949 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6950 ins_encode %{ 6951 int vlen = vector_length_in_bytes(this, $src1); 6952 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6953 BasicType bt = vector_element_basic_type(this, $src1); 6954 __ vpcmpu(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister, 6955 $vtmp2$$XMMRegister, $scratch$$Register); 6956 %} 6957 ins_pipe( pipe_slow ); 6958%} 6959 6960instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, legVec vtmp3, rRegP scratch) %{ 6961 predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) && 6962 is_unsigned_booltest_pred(n->in(2)->get_int()) && 6963 vector_length_in_bytes(n->in(1)->in(1)) == 32 && // src1 6964 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6965 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6966 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP scratch); 6967 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6968 ins_encode %{ 6969 int vlen = vector_length_in_bytes(this, $src1); 6970 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6971 BasicType bt = vector_element_basic_type(this, $src1); 6972 __ vpcmpu32(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister, 6973 $vtmp2$$XMMRegister, $vtmp3$$XMMRegister, $scratch$$Register); 6974 %} 6975 ins_pipe( pipe_slow ); 6976%} 6977 6978instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 6979 predicate(UseAVX > 2 && 6980 (VM_Version::supports_avx512vl() || 6981 vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 6982 is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 6983 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 6984 effect(TEMP scratch, TEMP ktmp); 6985 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 6986 ins_encode %{ 6987 assert(UseAVX > 2, "required"); 6988 6989 int vlen_enc = vector_length_encoding(this, $src1); 6990 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 6991 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 6992 KRegister mask = k0; // The comparison itself is not being masked. 6993 bool merge = false; 6994 BasicType src1_elem_bt = vector_element_basic_type(this, $src1); 6995 6996 switch (src1_elem_bt) { 6997 case T_BYTE: { 6998 __ evpcmpb($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 6999 __ evmovdqub($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7000 break; 7001 } 7002 case T_SHORT: { 7003 __ evpcmpw($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7004 __ evmovdquw($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7005 break; 7006 } 7007 case T_INT: { 7008 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7009 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7010 break; 7011 } 7012 case T_LONG: { 7013 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7014 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7015 break; 7016 } 7017 default: assert(false, "%s", type2name(src1_elem_bt)); 7018 } 7019 %} 7020 ins_pipe( pipe_slow ); 7021%} 7022 7023// Extract 7024 7025instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7026 predicate(vector_length_in_bytes(n->in(1)) <= 16); // src 7027 match(Set dst (ExtractI src idx)); 7028 match(Set dst (ExtractS src idx)); 7029#ifdef _LP64 7030 match(Set dst (ExtractB src idx)); 7031#endif 7032 format %{ "extractI $dst,$src,$idx\t!" %} 7033 ins_encode %{ 7034 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7035 7036 BasicType elem_bt = vector_element_basic_type(this, $src); 7037 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7038 %} 7039 ins_pipe( pipe_slow ); 7040%} 7041 7042instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7043 predicate(vector_length_in_bytes(n->in(1)) == 32 || // src 7044 vector_length_in_bytes(n->in(1)) == 64); // src 7045 match(Set dst (ExtractI src idx)); 7046 match(Set dst (ExtractS src idx)); 7047#ifdef _LP64 7048 match(Set dst (ExtractB src idx)); 7049#endif 7050 effect(TEMP vtmp); 7051 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7052 ins_encode %{ 7053 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7054 7055 BasicType elem_bt = vector_element_basic_type(this, $src); 7056 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7057 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7058 %} 7059 ins_pipe( pipe_slow ); 7060%} 7061 7062#ifdef _LP64 7063instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7064 predicate(vector_length(n->in(1)) <= 2); // src 7065 match(Set dst (ExtractL src idx)); 7066 format %{ "extractL $dst,$src,$idx\t!" %} 7067 ins_encode %{ 7068 assert(UseSSE >= 4, "required"); 7069 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7070 7071 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7072 %} 7073 ins_pipe( pipe_slow ); 7074%} 7075 7076instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7077 predicate(vector_length(n->in(1)) == 4 || // src 7078 vector_length(n->in(1)) == 8); // src 7079 match(Set dst (ExtractL src idx)); 7080 effect(TEMP vtmp); 7081 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7082 ins_encode %{ 7083 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7084 7085 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7086 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7087 %} 7088 ins_pipe( pipe_slow ); 7089%} 7090#endif 7091 7092instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7093 predicate(vector_length(n->in(1)) <= 4); 7094 match(Set dst (ExtractF src idx)); 7095 effect(TEMP dst, TEMP tmp, TEMP vtmp); 7096 format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7097 ins_encode %{ 7098 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7099 7100 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); 7101 %} 7102 ins_pipe( pipe_slow ); 7103%} 7104 7105instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7106 predicate(vector_length(n->in(1)/*src*/) == 8 || 7107 vector_length(n->in(1)/*src*/) == 16); 7108 match(Set dst (ExtractF src idx)); 7109 effect(TEMP tmp, TEMP vtmp); 7110 format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7111 ins_encode %{ 7112 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7113 7114 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7115 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); 7116 %} 7117 ins_pipe( pipe_slow ); 7118%} 7119 7120instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7121 predicate(vector_length(n->in(1)) == 2); // src 7122 match(Set dst (ExtractD src idx)); 7123 format %{ "extractD $dst,$src,$idx\t!" %} 7124 ins_encode %{ 7125 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7126 7127 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7128 %} 7129 ins_pipe( pipe_slow ); 7130%} 7131 7132instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7133 predicate(vector_length(n->in(1)) == 4 || // src 7134 vector_length(n->in(1)) == 8); // src 7135 match(Set dst (ExtractD src idx)); 7136 effect(TEMP vtmp); 7137 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7138 ins_encode %{ 7139 assert($idx$$constant < (int)vector_length(this, $src), "out of bounds"); 7140 7141 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7142 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7143 %} 7144 ins_pipe( pipe_slow ); 7145%} 7146 7147// --------------------------------- Vector Blend -------------------------------------- 7148 7149instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7150 predicate(UseAVX == 0); 7151 match(Set dst (VectorBlend (Binary dst src) mask)); 7152 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7153 effect(TEMP tmp); 7154 ins_encode %{ 7155 assert(UseSSE >= 4, "required"); 7156 7157 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7158 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7159 } 7160 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7161 %} 7162 ins_pipe( pipe_slow ); 7163%} 7164 7165instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7166 predicate(UseAVX > 0 && 7167 vector_length_in_bytes(n) <= 32 && 7168 is_integral_type(vector_element_basic_type(n))); 7169 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7170 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7171 ins_encode %{ 7172 int vlen_enc = vector_length_encoding(this); 7173 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7174 %} 7175 ins_pipe( pipe_slow ); 7176%} 7177 7178instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7179 predicate(UseAVX > 0 && 7180 vector_length_in_bytes(n) <= 32 && 7181 !is_integral_type(vector_element_basic_type(n))); 7182 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7183 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7184 ins_encode %{ 7185 int vlen_enc = vector_length_encoding(this); 7186 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7187 %} 7188 ins_pipe( pipe_slow ); 7189%} 7190 7191instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{ 7192 predicate(vector_length_in_bytes(n) == 64); 7193 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7194 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7195 effect(TEMP scratch, TEMP ktmp); 7196 ins_encode %{ 7197 int vlen_enc = Assembler::AVX_512bit; 7198 BasicType elem_bt = vector_element_basic_type(this); 7199 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); 7200 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7201 %} 7202 ins_pipe( pipe_slow ); 7203%} 7204 7205// --------------------------------- ABS -------------------------------------- 7206// a = |a| 7207instruct vabsB_reg(vec dst, vec src) %{ 7208 match(Set dst (AbsVB src)); 7209 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7210 ins_encode %{ 7211 uint vlen = vector_length(this); 7212 if (vlen <= 16) { 7213 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7214 } else { 7215 int vlen_enc = vector_length_encoding(this); 7216 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7217 } 7218 %} 7219 ins_pipe( pipe_slow ); 7220%} 7221 7222instruct vabsS_reg(vec dst, vec src) %{ 7223 match(Set dst (AbsVS src)); 7224 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7225 ins_encode %{ 7226 uint vlen = vector_length(this); 7227 if (vlen <= 8) { 7228 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7229 } else { 7230 int vlen_enc = vector_length_encoding(this); 7231 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7232 } 7233 %} 7234 ins_pipe( pipe_slow ); 7235%} 7236 7237instruct vabsI_reg(vec dst, vec src) %{ 7238 match(Set dst (AbsVI src)); 7239 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7240 ins_encode %{ 7241 uint vlen = vector_length(this); 7242 if (vlen <= 4) { 7243 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7244 } else { 7245 int vlen_enc = vector_length_encoding(this); 7246 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7247 } 7248 %} 7249 ins_pipe( pipe_slow ); 7250%} 7251 7252instruct vabsL_reg(vec dst, vec src) %{ 7253 match(Set dst (AbsVL src)); 7254 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7255 ins_encode %{ 7256 assert(UseAVX > 2, "required"); 7257 int vlen_enc = vector_length_encoding(this); 7258 if (!VM_Version::supports_avx512vl()) { 7259 vlen_enc = Assembler::AVX_512bit; 7260 } 7261 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7262 %} 7263 ins_pipe( pipe_slow ); 7264%} 7265 7266// --------------------------------- ABSNEG -------------------------------------- 7267 7268instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 7269 predicate(vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7270 match(Set dst (AbsVF src)); 7271 match(Set dst (NegVF src)); 7272 effect(TEMP scratch); 7273 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7274 ins_cost(150); 7275 ins_encode %{ 7276 int opcode = this->ideal_Opcode(); 7277 int vlen = vector_length(this); 7278 if (vlen == 2) { 7279 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7280 } else { 7281 assert(vlen == 8 || vlen == 16, "required"); 7282 int vlen_enc = vector_length_encoding(this); 7283 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7284 } 7285 %} 7286 ins_pipe( pipe_slow ); 7287%} 7288 7289instruct vabsneg4F(vec dst, rRegI scratch) %{ 7290 predicate(vector_length(n) == 4); 7291 match(Set dst (AbsVF dst)); 7292 match(Set dst (NegVF dst)); 7293 effect(TEMP scratch); 7294 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7295 ins_cost(150); 7296 ins_encode %{ 7297 int opcode = this->ideal_Opcode(); 7298 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 7299 %} 7300 ins_pipe( pipe_slow ); 7301%} 7302 7303instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 7304 match(Set dst (AbsVD src)); 7305 match(Set dst (NegVD src)); 7306 effect(TEMP scratch); 7307 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7308 ins_encode %{ 7309 int opcode = this->ideal_Opcode(); 7310 uint vlen = vector_length(this); 7311 if (vlen == 2) { 7312 assert(UseSSE >= 2, "required"); 7313 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7314 } else { 7315 int vlen_enc = vector_length_encoding(this); 7316 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7317 } 7318 %} 7319 ins_pipe( pipe_slow ); 7320%} 7321 7322//------------------------------------- VectorTest -------------------------------------------- 7323 7324#ifdef _LP64 7325instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{ 7326 predicate(vector_length_in_bytes(n->in(1)) >= 4 && 7327 vector_length_in_bytes(n->in(1)) < 16 && 7328 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7329 match(Set dst (VectorTest src1 src2 )); 7330 effect(TEMP vtmp1, TEMP vtmp2, KILL cr); 7331 format %{ "vector_test $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} 7332 ins_encode %{ 7333 int vlen = vector_length_in_bytes(this, $src1); 7334 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 7335 __ setb(Assembler::carrySet, $dst$$Register); 7336 __ movzbl($dst$$Register, $dst$$Register); 7337 %} 7338 ins_pipe( pipe_slow ); 7339%} 7340 7341instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7342 predicate(vector_length_in_bytes(n->in(1)) >= 16 && 7343 vector_length_in_bytes(n->in(1)) < 64 && 7344 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7345 match(Set dst (VectorTest src1 src2 )); 7346 effect(KILL cr); 7347 format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} 7348 ins_encode %{ 7349 int vlen = vector_length_in_bytes(this, $src1); 7350 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7351 __ setb(Assembler::carrySet, $dst$$Register); 7352 __ movzbl($dst$$Register, $dst$$Register); 7353 %} 7354 ins_pipe( pipe_slow ); 7355%} 7356 7357instruct vptest_alltrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{ 7358 predicate(vector_length_in_bytes(n->in(1)) == 64 && 7359 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7360 match(Set dst (VectorTest src1 src2 )); 7361 effect(KILL cr, TEMP ktmp); 7362 format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %} 7363 ins_encode %{ 7364 int vlen = vector_length_in_bytes(this, $src1); 7365 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); 7366 __ setb(Assembler::carrySet, $dst$$Register); 7367 __ movzbl($dst$$Register, $dst$$Register); 7368 %} 7369 ins_pipe( pipe_slow ); 7370%} 7371 7372instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{ 7373 predicate(vector_length_in_bytes(n->in(1)) >= 4 && 7374 vector_length_in_bytes(n->in(1)) < 16 && 7375 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7376 match(Set dst (VectorTest src1 src2 )); 7377 effect(TEMP vtmp, KILL cr); 7378 format %{ "vector_test_any_true $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} 7379 ins_encode %{ 7380 int vlen = vector_length_in_bytes(this, $src1); 7381 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 7382 __ setb(Assembler::notZero, $dst$$Register); 7383 __ movzbl($dst$$Register, $dst$$Register); 7384 %} 7385 ins_pipe( pipe_slow ); 7386%} 7387 7388instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7389 predicate(vector_length_in_bytes(n->in(1)) >= 16 && 7390 vector_length_in_bytes(n->in(1)) < 64 && 7391 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7392 match(Set dst (VectorTest src1 src2 )); 7393 effect(KILL cr); 7394 format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} 7395 ins_encode %{ 7396 int vlen = vector_length_in_bytes(this, $src1); 7397 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7398 __ setb(Assembler::notZero, $dst$$Register); 7399 __ movzbl($dst$$Register, $dst$$Register); 7400 %} 7401 ins_pipe( pipe_slow ); 7402%} 7403 7404instruct vptest_anytrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{ 7405 predicate(vector_length_in_bytes(n->in(1)) == 64 && 7406 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7407 match(Set dst (VectorTest src1 src2 )); 7408 effect(KILL cr, TEMP ktmp); 7409 format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %} 7410 ins_encode %{ 7411 int vlen = vector_length_in_bytes(this, $src1); 7412 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); 7413 __ setb(Assembler::notZero, $dst$$Register); 7414 __ movzbl($dst$$Register, $dst$$Register); 7415 %} 7416 ins_pipe( pipe_slow ); 7417%} 7418 7419instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{ 7420 predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 4 && 7421 vector_length_in_bytes(n->in(1)->in(1)) < 16 && 7422 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7423 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7424 effect(TEMP vtmp); 7425 format %{ "cmp_vector_test_any_true $src1,$src2\t! using $vtmp as TEMP" %} 7426 ins_encode %{ 7427 int vlen = vector_length_in_bytes(this, $src1); 7428 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 7429 %} 7430 ins_pipe( pipe_slow ); 7431%} 7432 7433instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ 7434 predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 16 && 7435 vector_length_in_bytes(n->in(1)->in(1)) < 64 && 7436 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7437 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7438 format %{ "cmp_vector_test_any_true $src1,$src2\t!" %} 7439 ins_encode %{ 7440 int vlen = vector_length_in_bytes(this, $src1); 7441 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7442 %} 7443 ins_pipe( pipe_slow ); 7444%} 7445 7446instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, kReg ktmp) %{ 7447 predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && 7448 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7449 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7450 effect(TEMP ktmp); 7451 format %{ "cmp_vector_test_any_true $src1,$src2\t!" %} 7452 ins_encode %{ 7453 int vlen = vector_length_in_bytes(this, $src1); 7454 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister); 7455 %} 7456 ins_pipe( pipe_slow ); 7457%} 7458#endif 7459 7460//------------------------------------- LoadMask -------------------------------------------- 7461 7462instruct loadMask(legVec dst, legVec src) %{ 7463 predicate(!VM_Version::supports_avx512vlbw()); 7464 match(Set dst (VectorLoadMask src)); 7465 effect(TEMP dst); 7466 format %{ "vector_loadmask_byte $dst,$src\n\t" %} 7467 ins_encode %{ 7468 int vlen_in_bytes = vector_length_in_bytes(this); 7469 BasicType elem_bt = vector_element_basic_type(this); 7470 7471 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 7472 %} 7473 ins_pipe( pipe_slow ); 7474%} 7475 7476instruct loadMask_evex(vec dst, vec src) %{ 7477 predicate(VM_Version::supports_avx512vlbw()); 7478 match(Set dst (VectorLoadMask src)); 7479 effect(TEMP dst); 7480 format %{ "vector_loadmask_byte $dst,$src\n\t" %} 7481 ins_encode %{ 7482 int vlen_in_bytes = vector_length_in_bytes(this); 7483 BasicType elem_bt = vector_element_basic_type(this); 7484 7485 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, false); 7486 %} 7487 ins_pipe( pipe_slow ); 7488%} 7489 7490//------------------------------------- StoreMask -------------------------------------------- 7491 7492instruct storeMask1B(vec dst, vec src, immI_1 size) %{ 7493 predicate(vector_length(n) < 64 || VM_Version::supports_avx512vlbw()); 7494 match(Set dst (VectorStoreMask src size)); 7495 format %{ "vector_store_mask $dst,$src\t!" %} 7496 ins_encode %{ 7497 assert(UseSSE >= 3, "required"); 7498 if (vector_length_in_bytes(this) <= 16) { 7499 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7500 } else { 7501 assert(UseAVX >= 2, "required"); 7502 int src_vlen_enc = vector_length_encoding(this, $src); 7503 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7504 } 7505 %} 7506 ins_pipe( pipe_slow ); 7507%} 7508 7509instruct storeMask2B(vec dst, vec src, immI_2 size) %{ 7510 predicate(vector_length(n) <= 8); 7511 match(Set dst (VectorStoreMask src size)); 7512 format %{ "vector_store_mask $dst,$src\n\t" %} 7513 ins_encode %{ 7514 assert(UseSSE >= 3, "required"); 7515 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7516 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7517 %} 7518 ins_pipe( pipe_slow ); 7519%} 7520 7521instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{ 7522 predicate(vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 7523 match(Set dst (VectorStoreMask src size)); 7524 effect(TEMP dst); 7525 format %{ "vector_store_mask $dst,$src\t!" %} 7526 ins_encode %{ 7527 int vlen_enc = Assembler::AVX_128bit; 7528 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 7529 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc); 7530 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7531 %} 7532 ins_pipe( pipe_slow ); 7533%} 7534 7535instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{ 7536 predicate(VM_Version::supports_avx512bw()); 7537 match(Set dst (VectorStoreMask src size)); 7538 format %{ "vector_store_mask $dst,$src\t!" %} 7539 ins_encode %{ 7540 int src_vlen_enc = vector_length_encoding(this, $src); 7541 int dst_vlen_enc = vector_length_encoding(this); 7542 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7543 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7544 %} 7545 ins_pipe( pipe_slow ); 7546%} 7547 7548instruct storeMask4B(vec dst, vec src, immI_4 size) %{ 7549 predicate (vector_length(n) <= 4 && UseAVX <= 2); 7550 match(Set dst (VectorStoreMask src size)); 7551 format %{ "vector_store_mask $dst,$src\t!" %} 7552 ins_encode %{ 7553 assert(UseSSE >= 3, "required"); 7554 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7555 __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); 7556 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7557 %} 7558 ins_pipe( pipe_slow ); 7559%} 7560 7561instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{ 7562 predicate(vector_length(n) == 8 && UseAVX <= 2); 7563 match(Set dst (VectorStoreMask src size)); 7564 format %{ "vector_store_mask $dst,$src\t!" %} 7565 effect(TEMP dst); 7566 ins_encode %{ 7567 int vlen_enc = Assembler::AVX_128bit; 7568 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 7569 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7570 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7571 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7572 %} 7573 ins_pipe( pipe_slow ); 7574%} 7575 7576instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{ 7577 predicate(UseAVX > 2); 7578 match(Set dst (VectorStoreMask src size)); 7579 format %{ "vector_store_mask $dst,$src\t!" %} 7580 ins_encode %{ 7581 int src_vlen_enc = vector_length_encoding(this, $src); 7582 int dst_vlen_enc = vector_length_encoding(this); 7583 if (!VM_Version::supports_avx512vl()) { 7584 src_vlen_enc = Assembler::AVX_512bit; 7585 } 7586 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7587 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7588 %} 7589 ins_pipe( pipe_slow ); 7590%} 7591 7592instruct storeMask8B(vec dst, vec src, immI_8 size) %{ 7593 predicate(vector_length(n) == 2 && UseAVX <= 2); 7594 match(Set dst (VectorStoreMask src size)); 7595 format %{ "vector_store_mask $dst,$src\t!" %} 7596 ins_encode %{ 7597 assert(UseSSE >= 3, "required"); 7598 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 7599 __ packssdw($dst$$XMMRegister, $dst$$XMMRegister); 7600 __ packsswb($dst$$XMMRegister, $dst$$XMMRegister); 7601 __ pabsb($dst$$XMMRegister, $dst$$XMMRegister); 7602 %} 7603 ins_pipe( pipe_slow ); 7604%} 7605 7606instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{ 7607 predicate(vector_length(n) == 4 && UseAVX <= 2); 7608 match(Set dst (VectorStoreMask src size)); 7609 format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %} 7610 effect(TEMP dst, TEMP vtmp); 7611 ins_encode %{ 7612 int vlen_enc = Assembler::AVX_128bit; 7613 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 7614 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7615 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 7616 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7617 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7618 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7619 %} 7620 ins_pipe( pipe_slow ); 7621%} 7622 7623instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{ 7624 predicate(UseAVX > 2); 7625 match(Set dst (VectorStoreMask src size)); 7626 format %{ "vector_store_mask $dst,$src\t!" %} 7627 ins_encode %{ 7628 int src_vlen_enc = vector_length_encoding(this, $src); 7629 int dst_vlen_enc = vector_length_encoding(this); 7630 if (!VM_Version::supports_avx512vl()) { 7631 src_vlen_enc = Assembler::AVX_512bit; 7632 } 7633 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7634 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 7635 %} 7636 ins_pipe( pipe_slow ); 7637%} 7638 7639instruct vmaskcast(vec dst) %{ 7640 predicate((vector_length(n) == vector_length(n->in(1))) && 7641 (vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1)))); 7642 match(Set dst (VectorMaskCast dst)); 7643 ins_cost(0); 7644 format %{ "vector_mask_cast $dst" %} 7645 ins_encode %{ 7646 // empty 7647 %} 7648 ins_pipe(empty); 7649%} 7650 7651//-------------------------------- Load Iota Indices ---------------------------------- 7652 7653instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ 7654 predicate(vector_element_basic_type(n) == T_BYTE); 7655 match(Set dst (VectorLoadConst src)); 7656 effect(TEMP scratch); 7657 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 7658 ins_encode %{ 7659 int vlen_in_bytes = vector_length_in_bytes(this); 7660 __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); 7661 %} 7662 ins_pipe( pipe_slow ); 7663%} 7664 7665//-------------------------------- Rearrange ---------------------------------- 7666 7667// LoadShuffle/Rearrange for Byte 7668 7669instruct loadShuffleB(vec dst) %{ 7670 predicate(vector_element_basic_type(n) == T_BYTE); 7671 match(Set dst (VectorLoadShuffle dst)); 7672 format %{ "vector_load_shuffle $dst, $dst" %} 7673 ins_encode %{ 7674 // empty 7675 %} 7676 ins_pipe( pipe_slow ); 7677%} 7678 7679instruct rearrangeB(vec dst, vec shuffle) %{ 7680 predicate(vector_element_basic_type(n) == T_BYTE && 7681 vector_length(n) < 32); 7682 match(Set dst (VectorRearrange dst shuffle)); 7683 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7684 ins_encode %{ 7685 assert(UseSSE >= 4, "required"); 7686 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7687 %} 7688 ins_pipe( pipe_slow ); 7689%} 7690 7691instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 7692 predicate(vector_element_basic_type(n) == T_BYTE && 7693 vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 7694 match(Set dst (VectorRearrange src shuffle)); 7695 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 7696 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 7697 ins_encode %{ 7698 assert(UseAVX >= 2, "required"); 7699 // Swap src into vtmp1 7700 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 7701 // Shuffle swapped src to get entries from other 128 bit lane 7702 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 7703 // Shuffle original src to get entries from self 128 bit lane 7704 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 7705 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 7706 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 7707 // Perform the blend 7708 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 7709 %} 7710 ins_pipe( pipe_slow ); 7711%} 7712 7713instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ 7714 predicate(vector_element_basic_type(n) == T_BYTE && 7715 vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 7716 match(Set dst (VectorRearrange src shuffle)); 7717 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7718 ins_encode %{ 7719 int vlen_enc = vector_length_encoding(this); 7720 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7721 %} 7722 ins_pipe( pipe_slow ); 7723%} 7724 7725// LoadShuffle/Rearrange for Short 7726 7727instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7728 predicate(vector_element_basic_type(n) == T_SHORT && 7729 vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 7730 match(Set dst (VectorLoadShuffle src)); 7731 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7732 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7733 ins_encode %{ 7734 // Create a byte shuffle mask from short shuffle mask 7735 // only byte shuffle instruction available on these platforms 7736 int vlen_in_bytes = vector_length_in_bytes(this); 7737 if (UseAVX == 0) { 7738 assert(vlen_in_bytes <= 16, "required"); 7739 // Multiply each shuffle by two to get byte index 7740 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 7741 __ psllw($vtmp$$XMMRegister, 1); 7742 7743 // Duplicate to create 2 copies of byte index 7744 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 7745 __ psllw($dst$$XMMRegister, 8); 7746 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 7747 7748 // Add one to get alternate byte index 7749 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 7750 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 7751 } else { 7752 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 7753 int vlen_enc = vector_length_encoding(this); 7754 // Multiply each shuffle by two to get byte index 7755 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 7756 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 7757 7758 // Duplicate to create 2 copies of byte index 7759 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 7760 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7761 7762 // Add one to get alternate byte index 7763 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, $scratch$$Register); 7764 } 7765 %} 7766 ins_pipe( pipe_slow ); 7767%} 7768 7769instruct rearrangeS(vec dst, vec shuffle) %{ 7770 predicate(vector_element_basic_type(n) == T_SHORT && 7771 vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 7772 match(Set dst (VectorRearrange dst shuffle)); 7773 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7774 ins_encode %{ 7775 assert(UseSSE >= 4, "required"); 7776 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7777 %} 7778 ins_pipe( pipe_slow ); 7779%} 7780 7781instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 7782 predicate(vector_element_basic_type(n) == T_SHORT && 7783 vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 7784 match(Set dst (VectorRearrange src shuffle)); 7785 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 7786 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 7787 ins_encode %{ 7788 assert(UseAVX >= 2, "required"); 7789 // Swap src into vtmp1 7790 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 7791 // Shuffle swapped src to get entries from other 128 bit lane 7792 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 7793 // Shuffle original src to get entries from self 128 bit lane 7794 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 7795 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 7796 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 7797 // Perform the blend 7798 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 7799 %} 7800 ins_pipe( pipe_slow ); 7801%} 7802 7803instruct loadShuffleS_evex(vec dst, vec src) %{ 7804 predicate(vector_element_basic_type(n) == T_SHORT && 7805 VM_Version::supports_avx512bw()); 7806 match(Set dst (VectorLoadShuffle src)); 7807 format %{ "vector_load_shuffle $dst, $src" %} 7808 ins_encode %{ 7809 int vlen_enc = vector_length_encoding(this); 7810 if (!VM_Version::supports_avx512vl()) { 7811 vlen_enc = Assembler::AVX_512bit; 7812 } 7813 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7814 %} 7815 ins_pipe( pipe_slow ); 7816%} 7817 7818instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 7819 predicate(vector_element_basic_type(n) == T_SHORT && 7820 VM_Version::supports_avx512bw()); 7821 match(Set dst (VectorRearrange src shuffle)); 7822 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7823 ins_encode %{ 7824 int vlen_enc = vector_length_encoding(this); 7825 if (!VM_Version::supports_avx512vl()) { 7826 vlen_enc = Assembler::AVX_512bit; 7827 } 7828 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7829 %} 7830 ins_pipe( pipe_slow ); 7831%} 7832 7833// LoadShuffle/Rearrange for Integer and Float 7834 7835instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7836 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7837 vector_length(n) == 4 && UseAVX < 2); 7838 match(Set dst (VectorLoadShuffle src)); 7839 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7840 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7841 ins_encode %{ 7842 assert(UseSSE >= 4, "required"); 7843 7844 // Create a byte shuffle mask from int shuffle mask 7845 // only byte shuffle instruction available on these platforms 7846 7847 // Duplicate and multiply each shuffle by 4 7848 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 7849 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 7850 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 7851 __ psllw($vtmp$$XMMRegister, 2); 7852 7853 // Duplicate again to create 4 copies of byte index 7854 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 7855 __ psllw($dst$$XMMRegister, 8); 7856 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 7857 7858 // Add 3,2,1,0 to get alternate byte index 7859 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); 7860 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 7861 %} 7862 ins_pipe( pipe_slow ); 7863%} 7864 7865instruct rearrangeI(vec dst, vec shuffle) %{ 7866 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7867 vector_length(n) == 4 && UseAVX < 2); 7868 match(Set dst (VectorRearrange dst shuffle)); 7869 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 7870 ins_encode %{ 7871 assert(UseSSE >= 4, "required"); 7872 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 7873 %} 7874 ins_pipe( pipe_slow ); 7875%} 7876 7877instruct loadShuffleI_avx(vec dst, vec src) %{ 7878 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7879 UseAVX >= 2); 7880 match(Set dst (VectorLoadShuffle src)); 7881 format %{ "vector_load_shuffle $dst, $src" %} 7882 ins_encode %{ 7883 int vlen_enc = vector_length_encoding(this); 7884 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7885 %} 7886 ins_pipe( pipe_slow ); 7887%} 7888 7889instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 7890 predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) && 7891 UseAVX >= 2); 7892 match(Set dst (VectorRearrange src shuffle)); 7893 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7894 ins_encode %{ 7895 int vlen_enc = vector_length_encoding(this); 7896 if (vlen_enc == Assembler::AVX_128bit) { 7897 vlen_enc = Assembler::AVX_256bit; 7898 } 7899 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7900 %} 7901 ins_pipe( pipe_slow ); 7902%} 7903 7904// LoadShuffle/Rearrange for Long and Double 7905 7906instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7907 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7908 vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 7909 match(Set dst (VectorLoadShuffle src)); 7910 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7911 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 7912 ins_encode %{ 7913 assert(UseAVX >= 2, "required"); 7914 7915 int vlen_enc = vector_length_encoding(this); 7916 // Create a double word shuffle mask from long shuffle mask 7917 // only double word shuffle instruction available on these platforms 7918 7919 // Multiply each shuffle by two to get double word index 7920 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 7921 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 7922 7923 // Duplicate each double word shuffle 7924 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 7925 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7926 7927 // Add one to get alternate double word index 7928 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); 7929 %} 7930 ins_pipe( pipe_slow ); 7931%} 7932 7933instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 7934 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7935 vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 7936 match(Set dst (VectorRearrange src shuffle)); 7937 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7938 ins_encode %{ 7939 assert(UseAVX >= 2, "required"); 7940 7941 int vlen_enc = vector_length_encoding(this); 7942 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7943 %} 7944 ins_pipe( pipe_slow ); 7945%} 7946 7947instruct loadShuffleL_evex(vec dst, vec src) %{ 7948 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7949 (vector_length(n) == 8 || VM_Version::supports_avx512vl())); 7950 match(Set dst (VectorLoadShuffle src)); 7951 format %{ "vector_load_shuffle $dst, $src" %} 7952 ins_encode %{ 7953 assert(UseAVX > 2, "required"); 7954 7955 int vlen_enc = vector_length_encoding(this); 7956 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7957 %} 7958 ins_pipe( pipe_slow ); 7959%} 7960 7961instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 7962 predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 7963 (vector_length(n) == 8 || VM_Version::supports_avx512vl())); 7964 match(Set dst (VectorRearrange src shuffle)); 7965 format %{ "vector_rearrange $dst, $shuffle, $src" %} 7966 ins_encode %{ 7967 assert(UseAVX > 2, "required"); 7968 7969 int vlen_enc = vector_length_encoding(this); 7970 if (vlen_enc == Assembler::AVX_128bit) { 7971 vlen_enc = Assembler::AVX_256bit; 7972 } 7973 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 7974 %} 7975 ins_pipe( pipe_slow ); 7976%} 7977 7978// --------------------------------- FMA -------------------------------------- 7979// a * b + c 7980 7981instruct vfmaF_reg(vec a, vec b, vec c) %{ 7982 match(Set c (FmaVF c (Binary a b))); 7983 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7984 ins_cost(150); 7985 ins_encode %{ 7986 assert(UseFMA, "not enabled"); 7987 int vlen_enc = vector_length_encoding(this); 7988 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 7989 %} 7990 ins_pipe( pipe_slow ); 7991%} 7992 7993instruct vfmaF_mem(vec a, memory b, vec c) %{ 7994 predicate(vector_length_in_bytes(n->in(1)) > 8); 7995 match(Set c (FmaVF c (Binary a (LoadVector b)))); 7996 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7997 ins_cost(150); 7998 ins_encode %{ 7999 assert(UseFMA, "not enabled"); 8000 int vlen_enc = vector_length_encoding(this); 8001 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8002 %} 8003 ins_pipe( pipe_slow ); 8004%} 8005 8006instruct vfmaD_reg(vec a, vec b, vec c) %{ 8007 match(Set c (FmaVD c (Binary a b))); 8008 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8009 ins_cost(150); 8010 ins_encode %{ 8011 assert(UseFMA, "not enabled"); 8012 int vlen_enc = vector_length_encoding(this); 8013 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8014 %} 8015 ins_pipe( pipe_slow ); 8016%} 8017 8018instruct vfmaD_mem(vec a, memory b, vec c) %{ 8019 predicate(vector_length_in_bytes(n->in(1)) > 8); 8020 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8021 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8022 ins_cost(150); 8023 ins_encode %{ 8024 assert(UseFMA, "not enabled"); 8025 int vlen_enc = vector_length_encoding(this); 8026 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8027 %} 8028 ins_pipe( pipe_slow ); 8029%} 8030 8031// --------------------------------- Vector Multiply Add -------------------------------------- 8032 8033instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8034 predicate(UseAVX == 0); 8035 match(Set dst (MulAddVS2VI dst src1)); 8036 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8037 ins_encode %{ 8038 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8039 %} 8040 ins_pipe( pipe_slow ); 8041%} 8042 8043instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8044 predicate(UseAVX > 0); 8045 match(Set dst (MulAddVS2VI src1 src2)); 8046 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8047 ins_encode %{ 8048 int vlen_enc = vector_length_encoding(this); 8049 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8050 %} 8051 ins_pipe( pipe_slow ); 8052%} 8053 8054// --------------------------------- Vector Multiply Add Add ---------------------------------- 8055 8056instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8057 predicate(VM_Version::supports_avx512_vnni()); 8058 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8059 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8060 ins_encode %{ 8061 assert(UseAVX > 2, "required"); 8062 int vlen_enc = vector_length_encoding(this); 8063 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8064 %} 8065 ins_pipe( pipe_slow ); 8066 ins_cost(10); 8067%} 8068 8069// --------------------------------- PopCount -------------------------------------- 8070 8071instruct vpopcountI(vec dst, vec src) %{ 8072 match(Set dst (PopCountVI src)); 8073 format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} 8074 ins_encode %{ 8075 assert(UsePopCountInstruction, "not enabled"); 8076 8077 int vlen_enc = vector_length_encoding(this); 8078 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8079 %} 8080 ins_pipe( pipe_slow ); 8081%} 8082 8083// --------------------------------- Bitwise Ternary Logic ---------------------------------- 8084 8085instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8086 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8087 effect(TEMP dst); 8088 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8089 ins_encode %{ 8090 int vector_len = vector_length_encoding(this); 8091 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8092 %} 8093 ins_pipe( pipe_slow ); 8094%} 8095 8096instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8097 predicate(vector_length_in_bytes(n->in(1)->in(1)) > 8); 8098 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8099 effect(TEMP dst); 8100 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8101 ins_encode %{ 8102 int vector_len = vector_length_encoding(this); 8103 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8104 %} 8105 ins_pipe( pipe_slow ); 8106%} 8107 8108// --------------------------------- Rotation Operations ---------------------------------- 8109instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8110 match(Set dst (RotateLeftV src shift)); 8111 match(Set dst (RotateRightV src shift)); 8112 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8113 ins_encode %{ 8114 int opcode = this->ideal_Opcode(); 8115 int vector_len = vector_length_encoding(this); 8116 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8117 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8118 %} 8119 ins_pipe( pipe_slow ); 8120%} 8121 8122instruct vprorate(vec dst, vec src, vec shift) %{ 8123 match(Set dst (RotateLeftV src shift)); 8124 match(Set dst (RotateRightV src shift)); 8125 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8126 ins_encode %{ 8127 int opcode = this->ideal_Opcode(); 8128 int vector_len = vector_length_encoding(this); 8129 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8130 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8131 %} 8132 ins_pipe( pipe_slow ); 8133%} 8134 8135#ifdef _LP64 8136// ---------------------------------- Masked Operations ------------------------------------ 8137 8138instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8139 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 8140 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 8141 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 8142 ins_encode %{ 8143 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 8144 assert(vector_element_basic_type(this, $src1) == vector_element_basic_type(this, $src2), "mismatch"); 8145 8146 Label DONE; 8147 int vlen_enc = vector_length_encoding(this, $src1); 8148 BasicType elem_bt = vector_element_basic_type(this, $src1); 8149 8150 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 8151 __ mov64($dst$$Register, -1L); 8152 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 8153 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 8154 __ jccb(Assembler::carrySet, DONE); 8155 __ kmovql($dst$$Register, $ktmp1$$KRegister); 8156 __ notq($dst$$Register); 8157 __ tzcntq($dst$$Register, $dst$$Register); 8158 __ bind(DONE); 8159 %} 8160 ins_pipe( pipe_slow ); 8161%} 8162 8163 8164instruct vmasked_load64(vec dst, memory mem, kReg mask) %{ 8165 match(Set dst (LoadVectorMasked mem mask)); 8166 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8167 ins_encode %{ 8168 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8169 int vector_len = vector_length_encoding(this); 8170 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, vector_len); 8171 %} 8172 ins_pipe( pipe_slow ); 8173%} 8174 8175instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{ 8176 match(Set dst (VectorMaskGen len)); 8177 effect(TEMP temp); 8178 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 8179 ins_encode %{ 8180 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 8181 %} 8182 ins_pipe( pipe_slow ); 8183%} 8184 8185instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 8186 match(Set dst (VectorMaskGen len)); 8187 format %{ "vector_mask_gen $len \t! vector mask generator" %} 8188 effect(TEMP temp); 8189 ins_encode %{ 8190 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 8191 __ kmovql($dst$$KRegister, $temp$$Register); 8192 %} 8193 ins_pipe( pipe_slow ); 8194%} 8195 8196instruct vmasked_store64(memory mem, vec src, kReg mask) %{ 8197 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8198 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8199 ins_encode %{ 8200 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8201 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8202 int vector_len = vector_length_encoding(src_node); 8203 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len); 8204 %} 8205 ins_pipe( pipe_slow ); 8206%} 8207 8208instruct vmask_truecount_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp) %{ 8209 predicate(VM_Version::supports_avx512vlbw()); 8210 match(Set dst (VectorMaskTrueCount mask)); 8211 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp); 8212 format %{ "vector_truecount_evex $mask \t! vector mask true count" %} 8213 ins_encode %{ 8214 int opcode = this->ideal_Opcode(); 8215 int vlen_enc = vector_length_encoding(this, $mask); 8216 int mask_len = vector_length(this, $mask); 8217 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8218 $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc); 8219 %} 8220 ins_pipe( pipe_slow ); 8221%} 8222 8223instruct vmask_first_or_last_true_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp, rFlagsReg cr) %{ 8224 predicate(VM_Version::supports_avx512vlbw()); 8225 match(Set dst (VectorMaskFirstTrue mask)); 8226 match(Set dst (VectorMaskLastTrue mask)); 8227 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp, KILL cr); 8228 format %{ "vector_mask_first_or_last_true_evex $mask \t! vector first/last true location" %} 8229 ins_encode %{ 8230 int opcode = this->ideal_Opcode(); 8231 int vlen_enc = vector_length_encoding(this, $mask); 8232 int mask_len = vector_length(this, $mask); 8233 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8234 $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc); 8235 %} 8236 ins_pipe( pipe_slow ); 8237%} 8238 8239instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1) %{ 8240 predicate(!VM_Version::supports_avx512vlbw()); 8241 match(Set dst (VectorMaskTrueCount mask)); 8242 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1); 8243 format %{ "vector_truecount_avx $mask \t! vector mask true count" %} 8244 ins_encode %{ 8245 int opcode = this->ideal_Opcode(); 8246 int vlen_enc = vector_length_encoding(this, $mask); 8247 int mask_len = vector_length(this, $mask); 8248 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8249 $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc); 8250 %} 8251 ins_pipe( pipe_slow ); 8252%} 8253 8254instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{ 8255 predicate(!VM_Version::supports_avx512vlbw()); 8256 match(Set dst (VectorMaskFirstTrue mask)); 8257 match(Set dst (VectorMaskLastTrue mask)); 8258 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr); 8259 format %{ "vector_mask_first_or_last_true_avx $mask \t! vector first/last true location" %} 8260 ins_encode %{ 8261 int opcode = this->ideal_Opcode(); 8262 int vlen_enc = vector_length_encoding(this, $mask); 8263 int mask_len = vector_length(this, $mask); 8264 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8265 $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc); 8266 %} 8267 ins_pipe( pipe_slow ); 8268%} 8269#endif // _LP64 8270 8271instruct castVV(vec dst) 8272%{ 8273 match(Set dst (CastVV dst)); 8274 8275 size(0); 8276 format %{ "# castVV of $dst" %} 8277 ins_encode(/* empty encoding */); 8278 ins_cost(0); 8279 ins_pipe(empty); 8280%} 8281 8282instruct castVVLeg(legVec dst) 8283%{ 8284 match(Set dst (CastVV dst)); 8285 8286 size(0); 8287 format %{ "# castVV of $dst" %} 8288 ins_encode(/* empty encoding */); 8289 ins_cost(0); 8290 ins_pipe(empty); 8291%} 8292