1// 2// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4// 5// This code is free software; you can redistribute it and/or modify it 6// under the terms of the GNU General Public License version 2 only, as 7// published by the Free Software Foundation. 8// 9// This code is distributed in the hope that it will be useful, but WITHOUT 10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12// version 2 for more details (a copy is included in the LICENSE file that 13// accompanied this code). 14// 15// You should have received a copy of the GNU General Public License version 16// 2 along with this work; if not, write to the Free Software Foundation, 17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18// 19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20// or visit www.oracle.com if you need additional information or have any 21// questions. 22// 23// 24 25// X86 Common Architecture Description File 26 27//----------REGISTER DEFINITION BLOCK------------------------------------------ 28// This information is used by the matcher and the register allocator to 29// describe individual registers and classes of registers within the target 30// archtecture. 31 32register %{ 33//----------Architecture Description Register Definitions---------------------- 34// General Registers 35// "reg_def" name ( register save type, C convention save type, 36// ideal register type, encoding ); 37// Register Save Types: 38// 39// NS = No-Save: The register allocator assumes that these registers 40// can be used without saving upon entry to the method, & 41// that they do not need to be saved at call sites. 42// 43// SOC = Save-On-Call: The register allocator assumes that these registers 44// can be used without saving upon entry to the method, 45// but that they must be saved at call sites. 46// 47// SOE = Save-On-Entry: The register allocator assumes that these registers 48// must be saved before using them upon entry to the 49// method, but they do not need to be saved at call 50// sites. 51// 52// AS = Always-Save: The register allocator assumes that these registers 53// must be saved before using them upon entry to the 54// method, & that they must be saved at call sites. 55// 56// Ideal Register Type is used to determine how to save & restore a 57// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59// 60// The encoding number is the actual bit-pattern placed into the opcodes. 61 62// XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63// Word a in each register holds a Float, words ab hold a Double. 64// The whole registers are used in SSE4.2 version intrinsics, 65// array copy stubs and superword operations (see UseSSE42Intrinsics, 66// UseXMMForArrayCopy and UseSuperword flags). 67// For pre EVEX enabled architectures: 68// XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69// For EVEX enabled architectures: 70// XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71// 72// Linux ABI: No register preserved across function calls 73// XMM0-XMM7 might hold parameters 74// Windows ABI: XMM6-XMM31 preserved across function calls 75// XMM0-XMM3 might hold parameters 76 77reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213#ifdef _LP64 214 215reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623#endif // _LP64 624 625#ifdef _LP64 626reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627#else 628reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629#endif // _LP64 630 631alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639#ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664#endif 665 ); 666 667// flags allocation class should be last. 668alloc_class chunk2(RFLAGS); 669 670// Singleton class for condition codes 671reg_class int_flags(RFLAGS); 672 673// Class for pre evex float registers 674reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682#ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691#endif 692 ); 693 694// Class for evex float registers 695reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703#ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728#endif 729 ); 730 731reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734// Class for pre evex double registers 735reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743#ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752#endif 753 ); 754 755// Class for evex double registers 756reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764#ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789#endif 790 ); 791 792reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795// Class for pre evex 32bit vector registers 796reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804#ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813#endif 814 ); 815 816// Class for evex 32bit vector registers 817reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825#ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850#endif 851 ); 852 853reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856// Class for all 64bit vector registers 857reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865#ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874#endif 875 ); 876 877// Class for all 64bit vector registers 878reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886#ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911#endif 912 ); 913 914reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917// Class for all 128bit vector registers 918reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926#ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935#endif 936 ); 937 938// Class for all 128bit vector registers 939reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947#ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972#endif 973 ); 974 975reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978// Class for all 256bit vector registers 979reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987#ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996#endif 997 ); 998 999// Class for all 256bit vector registers 1000reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008#ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033#endif 1034 ); 1035 1036reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039// Class for all 512bit vector registers 1040reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048#ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073#endif 1074 ); 1075 1076// Class for restricted 512bit vector registers 1077reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085#ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094#endif 1095 ); 1096 1097reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100%} 1101 1102 1103//----------SOURCE BLOCK------------------------------------------------------- 1104// This is a block of C++ code which provides values, functions, and 1105// definitions necessary in the rest of the architecture description 1106 1107source_hpp %{ 1108// Header information of the source block. 1109// Method declarations/definitions which are used outside 1110// the ad-scope can conveniently be defined here. 1111// 1112// To keep related declarations/definitions/uses close together, 1113// we switch between source %{ }% and source_hpp %{ }% freely as needed. 1114 1115class NativeJump; 1116 1117class CallStubImpl { 1118 1119 //-------------------------------------------------------------- 1120 //---< Used for optimization in Compile::shorten_branches >--- 1121 //-------------------------------------------------------------- 1122 1123 public: 1124 // Size of call trampoline stub. 1125 static uint size_call_trampoline() { 1126 return 0; // no call trampolines on this platform 1127 } 1128 1129 // number of relocations needed by a call trampoline stub 1130 static uint reloc_call_trampoline() { 1131 return 0; // no call trampolines on this platform 1132 } 1133}; 1134 1135class HandlerImpl { 1136 1137 public: 1138 1139 static int emit_exception_handler(CodeBuffer &cbuf); 1140 static int emit_deopt_handler(CodeBuffer& cbuf); 1141 1142 static uint size_exception_handler() { 1143 // NativeCall instruction size is the same as NativeJump. 1144 // exception handler starts out as jump and can be patched to 1145 // a call be deoptimization. (4932387) 1146 // Note that this value is also credited (in output.cpp) to 1147 // the size of the code section. 1148 return NativeJump::instruction_size; 1149 } 1150 1151#ifdef _LP64 1152 static uint size_deopt_handler() { 1153 // three 5 byte instructions plus one move for unreachable address. 1154 return 15+3; 1155 } 1156#else 1157 static uint size_deopt_handler() { 1158 // NativeCall instruction size is the same as NativeJump. 1159 // exception handler starts out as jump and can be patched to 1160 // a call be deoptimization. (4932387) 1161 // Note that this value is also credited (in output.cpp) to 1162 // the size of the code section. 1163 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1164 } 1165#endif 1166}; 1167 1168%} // end source_hpp 1169 1170source %{ 1171 1172#include "opto/addnode.hpp" 1173 1174// Emit exception handler code. 1175// Stuff framesize into a register and call a VM stub routine. 1176int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1177 1178 // Note that the code buffer's insts_mark is always relative to insts. 1179 // That's why we must use the macroassembler to generate a handler. 1180 MacroAssembler _masm(&cbuf); 1181 address base = __ start_a_stub(size_exception_handler()); 1182 if (base == NULL) { 1183 ciEnv::current()->record_failure("CodeCache is full"); 1184 return 0; // CodeBuffer::expand failed 1185 } 1186 int offset = __ offset(); 1187 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1188 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1189 __ end_a_stub(); 1190 return offset; 1191} 1192 1193// Emit deopt handler code. 1194int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1195 1196 // Note that the code buffer's insts_mark is always relative to insts. 1197 // That's why we must use the macroassembler to generate a handler. 1198 MacroAssembler _masm(&cbuf); 1199 address base = __ start_a_stub(size_deopt_handler()); 1200 if (base == NULL) { 1201 ciEnv::current()->record_failure("CodeCache is full"); 1202 return 0; // CodeBuffer::expand failed 1203 } 1204 int offset = __ offset(); 1205 1206#ifdef _LP64 1207 address the_pc = (address) __ pc(); 1208 Label next; 1209 // push a "the_pc" on the stack without destroying any registers 1210 // as they all may be live. 1211 1212 // push address of "next" 1213 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1214 __ bind(next); 1215 // adjust it so it matches "the_pc" 1216 __ subptr(Address(rsp, 0), __ offset() - offset); 1217#else 1218 InternalAddress here(__ pc()); 1219 __ pushptr(here.addr()); 1220#endif 1221 1222 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1223 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1224 __ end_a_stub(); 1225 return offset; 1226} 1227 1228 1229//============================================================================= 1230 1231 // Float masks come from different places depending on platform. 1232#ifdef _LP64 1233 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1234 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1235 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1236 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1237#else 1238 static address float_signmask() { return (address)float_signmask_pool; } 1239 static address float_signflip() { return (address)float_signflip_pool; } 1240 static address double_signmask() { return (address)double_signmask_pool; } 1241 static address double_signflip() { return (address)double_signflip_pool; } 1242#endif 1243 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1244 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1245 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1246 1247//============================================================================= 1248const bool Matcher::match_rule_supported(int opcode) { 1249 if (!has_match_rule(opcode)) { 1250 return false; // no match rule present 1251 } 1252 switch (opcode) { 1253 case Op_AbsVL: 1254 if (UseAVX < 3) { 1255 return false; 1256 } 1257 break; 1258 case Op_PopCountI: 1259 case Op_PopCountL: 1260 if (!UsePopCountInstruction) { 1261 return false; 1262 } 1263 break; 1264 case Op_PopCountVI: 1265 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) { 1266 return false; 1267 } 1268 break; 1269 case Op_MulVI: 1270 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1271 return false; 1272 } 1273 break; 1274 case Op_MulVL: 1275 case Op_MulReductionVL: 1276 if (VM_Version::supports_avx512dq() == false) { 1277 return false; 1278 } 1279 break; 1280 case Op_AddReductionVL: 1281 if (UseAVX < 3) { // only EVEX : vector connectivity becomes an issue here 1282 return false; 1283 } 1284 break; 1285 case Op_AbsVB: 1286 case Op_AbsVS: 1287 case Op_AbsVI: 1288 case Op_AddReductionVI: 1289 if (UseSSE < 3 || !VM_Version::supports_ssse3()) { // requires at least SSSE3 1290 return false; 1291 } 1292 break; 1293 case Op_MulReductionVI: 1294 if (UseSSE < 4) { // requires at least SSE4 1295 return false; 1296 } 1297 break; 1298 case Op_AddReductionVF: 1299 case Op_AddReductionVD: 1300 case Op_MulReductionVF: 1301 case Op_MulReductionVD: 1302 if (UseSSE < 1) { // requires at least SSE 1303 return false; 1304 } 1305 break; 1306 case Op_SqrtVD: 1307 case Op_SqrtVF: 1308 if (UseAVX < 1) { // enabled for AVX only 1309 return false; 1310 } 1311 break; 1312 case Op_CompareAndSwapL: 1313#ifdef _LP64 1314 case Op_CompareAndSwapP: 1315#endif 1316 if (!VM_Version::supports_cx8()) { 1317 return false; 1318 } 1319 break; 1320 case Op_CMoveVF: 1321 case Op_CMoveVD: 1322 if (UseAVX < 1 || UseAVX > 2) { 1323 return false; 1324 } 1325 break; 1326 case Op_StrIndexOf: 1327 if (!UseSSE42Intrinsics) { 1328 return false; 1329 } 1330 break; 1331 case Op_StrIndexOfChar: 1332 if (!UseSSE42Intrinsics) { 1333 return false; 1334 } 1335 break; 1336 case Op_OnSpinWait: 1337 if (VM_Version::supports_on_spin_wait() == false) { 1338 return false; 1339 } 1340 break; 1341 case Op_MulAddVS2VI: 1342 case Op_RShiftVL: 1343 case Op_AbsVD: 1344 case Op_NegVD: 1345 if (UseSSE < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_MulVB: 1350 case Op_LShiftVB: 1351 case Op_RShiftVB: 1352 case Op_URShiftVB: 1353 if (UseSSE < 4) { 1354 return false; 1355 } 1356 break; 1357#ifdef _LP64 1358 case Op_MaxD: 1359 case Op_MaxF: 1360 case Op_MinD: 1361 case Op_MinF: 1362 if (UseAVX < 1) { // enabled for AVX only 1363 return false; 1364 } 1365 break; 1366#endif 1367 case Op_CacheWB: 1368 case Op_CacheWBPreSync: 1369 case Op_CacheWBPostSync: 1370 if (!VM_Version::supports_data_cache_line_flush()) { 1371 return false; 1372 } 1373 break; 1374 case Op_RoundDoubleMode: 1375 if (UseSSE < 4) { 1376 return false; 1377 } 1378 break; 1379 case Op_RoundDoubleModeV: 1380 if (VM_Version::supports_avx() == false) { 1381 return false; // 128bit vroundpd is not available 1382 } 1383 break; 1384 } 1385 return true; // Match rules are supported by default. 1386} 1387 1388//------------------------------------------------------------------------ 1389 1390// Identify extra cases that we might want to provide match rules for vector nodes and 1391// other intrinsics guarded with vector length (vlen) and element type (bt). 1392const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1393 if (!match_rule_supported(opcode)) { 1394 return false; 1395 } 1396 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1397 // * SSE2 supports 128bit vectors for all types; 1398 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1399 // * AVX2 supports 256bit vectors for all types; 1400 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1401 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1402 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1403 // And MaxVectorSize is taken into account as well. 1404 if (!vector_size_supported(bt, vlen)) { 1405 return false; 1406 } 1407 // Special cases which require vector length follow: 1408 // * implementation limitations 1409 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1410 // * 128bit vroundpd instruction is present only in AVX1 1411 switch (opcode) { 1412 case Op_AbsVF: 1413 case Op_NegVF: 1414 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1415 return false; // 512bit vandps and vxorps are not available 1416 } 1417 break; 1418 case Op_AbsVD: 1419 case Op_NegVD: 1420 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1421 return false; // 512bit vandpd and vxorpd are not available 1422 } 1423 break; 1424 case Op_CMoveVF: 1425 if (vlen != 8) { 1426 return false; // implementation limitation (only vcmov8F_reg is present) 1427 } 1428 break; 1429 case Op_CMoveVD: 1430 if (vlen != 4) { 1431 return false; // implementation limitation (only vcmov4D_reg is present) 1432 } 1433 break; 1434 } 1435 return true; // Per default match rules are supported. 1436} 1437 1438// x86 supports generic vector operands: vec and legVec. 1439const bool Matcher::supports_generic_vector_operands = true; 1440 1441MachOper* Matcher::specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 1442 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 1443 bool legacy = (generic_opnd->opcode() == LEGVEC); 1444 if (!VM_Version::supports_avx512vlbwdq() && // KNL 1445 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 1446 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 1447 return new legVecZOper(); 1448 } 1449 if (legacy) { 1450 switch (ideal_reg) { 1451 case Op_VecS: return new legVecSOper(); 1452 case Op_VecD: return new legVecDOper(); 1453 case Op_VecX: return new legVecXOper(); 1454 case Op_VecY: return new legVecYOper(); 1455 case Op_VecZ: return new legVecZOper(); 1456 } 1457 } else { 1458 switch (ideal_reg) { 1459 case Op_VecS: return new vecSOper(); 1460 case Op_VecD: return new vecDOper(); 1461 case Op_VecX: return new vecXOper(); 1462 case Op_VecY: return new vecYOper(); 1463 case Op_VecZ: return new vecZOper(); 1464 } 1465 } 1466 ShouldNotReachHere(); 1467 return NULL; 1468} 1469 1470bool Matcher::is_generic_reg2reg_move(MachNode* m) { 1471 switch (m->rule()) { 1472 case MoveVec2Leg_rule: 1473 case MoveLeg2Vec_rule: 1474 return true; 1475 default: 1476 return false; 1477 } 1478} 1479 1480bool Matcher::is_generic_vector(MachOper* opnd) { 1481 switch (opnd->opcode()) { 1482 case VEC: 1483 case LEGVEC: 1484 return true; 1485 default: 1486 return false; 1487 } 1488} 1489 1490//------------------------------------------------------------------------ 1491 1492const bool Matcher::has_predicated_vectors(void) { 1493 bool ret_value = false; 1494 if (UseAVX > 2) { 1495 ret_value = VM_Version::supports_avx512vl(); 1496 } 1497 1498 return ret_value; 1499} 1500 1501const int Matcher::float_pressure(int default_pressure_threshold) { 1502 int float_pressure_threshold = default_pressure_threshold; 1503#ifdef _LP64 1504 if (UseAVX > 2) { 1505 // Increase pressure threshold on machines with AVX3 which have 1506 // 2x more XMM registers. 1507 float_pressure_threshold = default_pressure_threshold * 2; 1508 } 1509#endif 1510 return float_pressure_threshold; 1511} 1512 1513// Max vector size in bytes. 0 if not supported. 1514const int Matcher::vector_width_in_bytes(BasicType bt) { 1515 assert(is_java_primitive(bt), "only primitive type vectors"); 1516 if (UseSSE < 2) return 0; 1517 // SSE2 supports 128bit vectors for all types. 1518 // AVX2 supports 256bit vectors for all types. 1519 // AVX2/EVEX supports 512bit vectors for all types. 1520 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1521 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1522 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1523 size = (UseAVX > 2) ? 64 : 32; 1524 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1525 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1526 // Use flag to limit vector size. 1527 size = MIN2(size,(int)MaxVectorSize); 1528 // Minimum 2 values in vector (or 4 for bytes). 1529 switch (bt) { 1530 case T_DOUBLE: 1531 case T_LONG: 1532 if (size < 16) return 0; 1533 break; 1534 case T_FLOAT: 1535 case T_INT: 1536 if (size < 8) return 0; 1537 break; 1538 case T_BOOLEAN: 1539 if (size < 4) return 0; 1540 break; 1541 case T_CHAR: 1542 if (size < 4) return 0; 1543 break; 1544 case T_BYTE: 1545 if (size < 4) return 0; 1546 break; 1547 case T_SHORT: 1548 if (size < 4) return 0; 1549 break; 1550 default: 1551 ShouldNotReachHere(); 1552 } 1553 return size; 1554} 1555 1556// Limits on vector size (number of elements) loaded into vector. 1557const int Matcher::max_vector_size(const BasicType bt) { 1558 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1559} 1560const int Matcher::min_vector_size(const BasicType bt) { 1561 int max_size = max_vector_size(bt); 1562 // Min size which can be loaded into vector is 4 bytes. 1563 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1564 return MIN2(size,max_size); 1565} 1566 1567// Vector ideal reg corresponding to specified size in bytes 1568const uint Matcher::vector_ideal_reg(int size) { 1569 assert(MaxVectorSize >= size, ""); 1570 switch(size) { 1571 case 4: return Op_VecS; 1572 case 8: return Op_VecD; 1573 case 16: return Op_VecX; 1574 case 32: return Op_VecY; 1575 case 64: return Op_VecZ; 1576 } 1577 ShouldNotReachHere(); 1578 return 0; 1579} 1580 1581// Only lowest bits of xmm reg are used for vector shift count. 1582const uint Matcher::vector_shift_count_ideal_reg(int size) { 1583 return Op_VecS; 1584} 1585 1586// x86 supports misaligned vectors store/load. 1587const bool Matcher::misaligned_vectors_ok() { 1588 return true; 1589} 1590 1591// x86 AES instructions are compatible with SunJCE expanded 1592// keys, hence we do not need to pass the original key to stubs 1593const bool Matcher::pass_original_key_for_aes() { 1594 return false; 1595} 1596 1597 1598const bool Matcher::convi2l_type_required = true; 1599 1600// Check for shift by small constant as well 1601static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1602 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1603 shift->in(2)->get_int() <= 3 && 1604 // Are there other uses besides address expressions? 1605 !matcher->is_visited(shift)) { 1606 address_visited.set(shift->_idx); // Flag as address_visited 1607 mstack.push(shift->in(2), Matcher::Visit); 1608 Node *conv = shift->in(1); 1609#ifdef _LP64 1610 // Allow Matcher to match the rule which bypass 1611 // ConvI2L operation for an array index on LP64 1612 // if the index value is positive. 1613 if (conv->Opcode() == Op_ConvI2L && 1614 conv->as_Type()->type()->is_long()->_lo >= 0 && 1615 // Are there other uses besides address expressions? 1616 !matcher->is_visited(conv)) { 1617 address_visited.set(conv->_idx); // Flag as address_visited 1618 mstack.push(conv->in(1), Matcher::Pre_Visit); 1619 } else 1620#endif 1621 mstack.push(conv, Matcher::Pre_Visit); 1622 return true; 1623 } 1624 return false; 1625} 1626 1627// Should the Matcher clone shifts on addressing modes, expecting them 1628// to be subsumed into complex addressing expressions or compute them 1629// into registers? 1630bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1631 Node *off = m->in(AddPNode::Offset); 1632 if (off->is_Con()) { 1633 address_visited.test_set(m->_idx); // Flag as address_visited 1634 Node *adr = m->in(AddPNode::Address); 1635 1636 // Intel can handle 2 adds in addressing mode 1637 // AtomicAdd is not an addressing expression. 1638 // Cheap to find it by looking for screwy base. 1639 if (adr->is_AddP() && 1640 !adr->in(AddPNode::Base)->is_top() && 1641 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1642 // Are there other uses besides address expressions? 1643 !is_visited(adr)) { 1644 address_visited.set(adr->_idx); // Flag as address_visited 1645 Node *shift = adr->in(AddPNode::Offset); 1646 if (!clone_shift(shift, this, mstack, address_visited)) { 1647 mstack.push(shift, Pre_Visit); 1648 } 1649 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1650 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1651 } else { 1652 mstack.push(adr, Pre_Visit); 1653 } 1654 1655 // Clone X+offset as it also folds into most addressing expressions 1656 mstack.push(off, Visit); 1657 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1658 return true; 1659 } else if (clone_shift(off, this, mstack, address_visited)) { 1660 address_visited.test_set(m->_idx); // Flag as address_visited 1661 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1662 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1663 return true; 1664 } 1665 return false; 1666} 1667 1668void Compile::reshape_address(AddPNode* addp) { 1669} 1670 1671static inline uint vector_length(const MachNode* n) { 1672 const TypeVect* vt = n->bottom_type()->is_vect(); 1673 return vt->length(); 1674} 1675 1676static inline uint vector_length_in_bytes(const MachNode* n) { 1677 const TypeVect* vt = n->bottom_type()->is_vect(); 1678 return vt->length_in_bytes(); 1679} 1680 1681static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) { 1682 uint def_idx = use->operand_index(opnd); 1683 Node* def = use->in(def_idx); 1684 return def->bottom_type()->is_vect()->length_in_bytes(); 1685} 1686 1687static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) { 1688 switch(vector_length_in_bytes(n)) { 1689 case 4: // fall-through 1690 case 8: // fall-through 1691 case 16: return Assembler::AVX_128bit; 1692 case 32: return Assembler::AVX_256bit; 1693 case 64: return Assembler::AVX_512bit; 1694 1695 default: { 1696 ShouldNotReachHere(); 1697 return Assembler::AVX_NoVec; 1698 } 1699 } 1700} 1701 1702// Helper methods for MachSpillCopyNode::implementation(). 1703static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1704 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1705 // In 64-bit VM size calculation is very complex. Emitting instructions 1706 // into scratch buffer is used to get size in 64-bit VM. 1707 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1708 assert(ireg == Op_VecS || // 32bit vector 1709 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1710 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1711 "no non-adjacent vector moves" ); 1712 if (cbuf) { 1713 MacroAssembler _masm(cbuf); 1714 int offset = __ offset(); 1715 switch (ireg) { 1716 case Op_VecS: // copy whole register 1717 case Op_VecD: 1718 case Op_VecX: 1719#ifndef _LP64 1720 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1721#else 1722 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1723 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1724 } else { 1725 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1726 } 1727#endif 1728 break; 1729 case Op_VecY: 1730#ifndef _LP64 1731 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1732#else 1733 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1734 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1735 } else { 1736 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1737 } 1738#endif 1739 break; 1740 case Op_VecZ: 1741 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1742 break; 1743 default: 1744 ShouldNotReachHere(); 1745 } 1746 int size = __ offset() - offset; 1747#ifdef ASSERT 1748 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1749 assert(!do_size || size == 4, "incorrect size calculattion"); 1750#endif 1751 return size; 1752#ifndef PRODUCT 1753 } else if (!do_size) { 1754 switch (ireg) { 1755 case Op_VecS: 1756 case Op_VecD: 1757 case Op_VecX: 1758 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1759 break; 1760 case Op_VecY: 1761 case Op_VecZ: 1762 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1763 break; 1764 default: 1765 ShouldNotReachHere(); 1766 } 1767#endif 1768 } 1769 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1770 return (UseAVX > 2) ? 6 : 4; 1771} 1772 1773int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1774 int stack_offset, int reg, uint ireg, outputStream* st) { 1775 // In 64-bit VM size calculation is very complex. Emitting instructions 1776 // into scratch buffer is used to get size in 64-bit VM. 1777 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1778 if (cbuf) { 1779 MacroAssembler _masm(cbuf); 1780 int offset = __ offset(); 1781 if (is_load) { 1782 switch (ireg) { 1783 case Op_VecS: 1784 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1785 break; 1786 case Op_VecD: 1787 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1788 break; 1789 case Op_VecX: 1790#ifndef _LP64 1791 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1792#else 1793 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1794 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1795 } else { 1796 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1797 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1798 } 1799#endif 1800 break; 1801 case Op_VecY: 1802#ifndef _LP64 1803 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1804#else 1805 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1806 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1807 } else { 1808 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1809 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1810 } 1811#endif 1812 break; 1813 case Op_VecZ: 1814 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1815 break; 1816 default: 1817 ShouldNotReachHere(); 1818 } 1819 } else { // store 1820 switch (ireg) { 1821 case Op_VecS: 1822 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1823 break; 1824 case Op_VecD: 1825 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1826 break; 1827 case Op_VecX: 1828#ifndef _LP64 1829 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1830#else 1831 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1832 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1833 } 1834 else { 1835 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1836 } 1837#endif 1838 break; 1839 case Op_VecY: 1840#ifndef _LP64 1841 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1842#else 1843 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1844 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1845 } 1846 else { 1847 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1848 } 1849#endif 1850 break; 1851 case Op_VecZ: 1852 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1853 break; 1854 default: 1855 ShouldNotReachHere(); 1856 } 1857 } 1858 int size = __ offset() - offset; 1859#ifdef ASSERT 1860 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1861 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1862 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1863#endif 1864 return size; 1865#ifndef PRODUCT 1866 } else if (!do_size) { 1867 if (is_load) { 1868 switch (ireg) { 1869 case Op_VecS: 1870 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1871 break; 1872 case Op_VecD: 1873 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1874 break; 1875 case Op_VecX: 1876 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1877 break; 1878 case Op_VecY: 1879 case Op_VecZ: 1880 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1881 break; 1882 default: 1883 ShouldNotReachHere(); 1884 } 1885 } else { // store 1886 switch (ireg) { 1887 case Op_VecS: 1888 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1889 break; 1890 case Op_VecD: 1891 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1892 break; 1893 case Op_VecX: 1894 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1895 break; 1896 case Op_VecY: 1897 case Op_VecZ: 1898 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1899 break; 1900 default: 1901 ShouldNotReachHere(); 1902 } 1903 } 1904#endif 1905 } 1906 bool is_single_byte = false; 1907 int vec_len = 0; 1908 if ((UseAVX > 2) && (stack_offset != 0)) { 1909 int tuple_type = Assembler::EVEX_FVM; 1910 int input_size = Assembler::EVEX_32bit; 1911 switch (ireg) { 1912 case Op_VecS: 1913 tuple_type = Assembler::EVEX_T1S; 1914 break; 1915 case Op_VecD: 1916 tuple_type = Assembler::EVEX_T1S; 1917 input_size = Assembler::EVEX_64bit; 1918 break; 1919 case Op_VecX: 1920 break; 1921 case Op_VecY: 1922 vec_len = 1; 1923 break; 1924 case Op_VecZ: 1925 vec_len = 2; 1926 break; 1927 } 1928 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1929 } 1930 int offset_size = 0; 1931 int size = 5; 1932 if (UseAVX > 2 ) { 1933 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1934 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1935 size += 2; // Need an additional two bytes for EVEX encoding 1936 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1937 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1938 } else { 1939 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1940 size += 2; // Need an additional two bytes for EVEX encodding 1941 } 1942 } else { 1943 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1944 } 1945 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1946 return size+offset_size; 1947} 1948 1949static inline jint replicate4_imm(int con, int width) { 1950 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1951 assert(width == 1 || width == 2, "only byte or short types here"); 1952 int bit_width = width * 8; 1953 jint val = con; 1954 val &= (1 << bit_width) - 1; // mask off sign bits 1955 while(bit_width < 32) { 1956 val |= (val << bit_width); 1957 bit_width <<= 1; 1958 } 1959 return val; 1960} 1961 1962static inline jlong replicate8_imm(int con, int width) { 1963 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1964 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1965 int bit_width = width * 8; 1966 jlong val = con; 1967 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1968 while(bit_width < 64) { 1969 val |= (val << bit_width); 1970 bit_width <<= 1; 1971 } 1972 return val; 1973} 1974 1975#ifndef PRODUCT 1976 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1977 st->print("nop \t# %d bytes pad for loops and calls", _count); 1978 } 1979#endif 1980 1981 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1982 MacroAssembler _masm(&cbuf); 1983 __ nop(_count); 1984 } 1985 1986 uint MachNopNode::size(PhaseRegAlloc*) const { 1987 return _count; 1988 } 1989 1990#ifndef PRODUCT 1991 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1992 st->print("# breakpoint"); 1993 } 1994#endif 1995 1996 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1997 MacroAssembler _masm(&cbuf); 1998 __ int3(); 1999 } 2000 2001 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2002 return MachNode::size(ra_); 2003 } 2004 2005%} 2006 2007encode %{ 2008 2009 enc_class call_epilog %{ 2010 if (VerifyStackAtCalls) { 2011 // Check that stack depth is unchanged: find majik cookie on stack 2012 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2013 MacroAssembler _masm(&cbuf); 2014 Label L; 2015 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2016 __ jccb(Assembler::equal, L); 2017 // Die if stack mismatch 2018 __ int3(); 2019 __ bind(L); 2020 } 2021 %} 2022 2023%} 2024 2025 2026//----------OPERANDS----------------------------------------------------------- 2027// Operand definitions must precede instruction definitions for correct parsing 2028// in the ADLC because operands constitute user defined types which are used in 2029// instruction definitions. 2030 2031// Vectors 2032 2033// Dummy generic vector class. Should be used for all vector operands. 2034// Replaced with vec[SDXYZ] during post-selection pass. 2035operand vec() %{ 2036 constraint(ALLOC_IN_RC(dynamic)); 2037 match(VecX); 2038 match(VecY); 2039 match(VecZ); 2040 match(VecS); 2041 match(VecD); 2042 2043 format %{ %} 2044 interface(REG_INTER); 2045%} 2046 2047// Dummy generic legacy vector class. Should be used for all legacy vector operands. 2048// Replaced with legVec[SDXYZ] during post-selection cleanup. 2049// Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2050// runtime code generation via reg_class_dynamic. 2051operand legVec() %{ 2052 constraint(ALLOC_IN_RC(dynamic)); 2053 match(VecX); 2054 match(VecY); 2055 match(VecZ); 2056 match(VecS); 2057 match(VecD); 2058 2059 format %{ %} 2060 interface(REG_INTER); 2061%} 2062 2063// Replaces vec during post-selection cleanup. See above. 2064operand vecS() %{ 2065 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2066 match(VecS); 2067 2068 format %{ %} 2069 interface(REG_INTER); 2070%} 2071 2072// Replaces legVec during post-selection cleanup. See above. 2073operand legVecS() %{ 2074 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2075 match(VecS); 2076 2077 format %{ %} 2078 interface(REG_INTER); 2079%} 2080 2081// Replaces vec during post-selection cleanup. See above. 2082operand vecD() %{ 2083 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2084 match(VecD); 2085 2086 format %{ %} 2087 interface(REG_INTER); 2088%} 2089 2090// Replaces legVec during post-selection cleanup. See above. 2091operand legVecD() %{ 2092 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2093 match(VecD); 2094 2095 format %{ %} 2096 interface(REG_INTER); 2097%} 2098 2099// Replaces vec during post-selection cleanup. See above. 2100operand vecX() %{ 2101 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2102 match(VecX); 2103 2104 format %{ %} 2105 interface(REG_INTER); 2106%} 2107 2108// Replaces legVec during post-selection cleanup. See above. 2109operand legVecX() %{ 2110 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2111 match(VecX); 2112 2113 format %{ %} 2114 interface(REG_INTER); 2115%} 2116 2117// Replaces vec during post-selection cleanup. See above. 2118operand vecY() %{ 2119 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2120 match(VecY); 2121 2122 format %{ %} 2123 interface(REG_INTER); 2124%} 2125 2126// Replaces legVec during post-selection cleanup. See above. 2127operand legVecY() %{ 2128 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2129 match(VecY); 2130 2131 format %{ %} 2132 interface(REG_INTER); 2133%} 2134 2135// Replaces vec during post-selection cleanup. See above. 2136operand vecZ() %{ 2137 constraint(ALLOC_IN_RC(vectorz_reg)); 2138 match(VecZ); 2139 2140 format %{ %} 2141 interface(REG_INTER); 2142%} 2143 2144// Replaces legVec during post-selection cleanup. See above. 2145operand legVecZ() %{ 2146 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2147 match(VecZ); 2148 2149 format %{ %} 2150 interface(REG_INTER); 2151%} 2152 2153// Comparison Code for FP conditional move 2154operand cmpOp_vcmppd() %{ 2155 match(Bool); 2156 2157 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2158 n->as_Bool()->_test._test != BoolTest::no_overflow); 2159 format %{ "" %} 2160 interface(COND_INTER) %{ 2161 equal (0x0, "eq"); 2162 less (0x1, "lt"); 2163 less_equal (0x2, "le"); 2164 not_equal (0xC, "ne"); 2165 greater_equal(0xD, "ge"); 2166 greater (0xE, "gt"); 2167 //TODO cannot compile (adlc breaks) without two next lines with error: 2168 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2169 // equal' for overflow. 2170 overflow (0x20, "o"); // not really supported by the instruction 2171 no_overflow (0x21, "no"); // not really supported by the instruction 2172 %} 2173%} 2174 2175 2176// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2177 2178// ============================================================================ 2179 2180instruct ShouldNotReachHere() %{ 2181 match(Halt); 2182 format %{ "ud2\t# ShouldNotReachHere" %} 2183 ins_encode %{ 2184 __ stop(_halt_reason); 2185 %} 2186 ins_pipe(pipe_slow); 2187%} 2188 2189// =================================EVEX special=============================== 2190 2191instruct setMask(rRegI dst, rRegI src) %{ 2192 predicate(Matcher::has_predicated_vectors()); 2193 match(Set dst (SetVectMaskI src)); 2194 effect(TEMP dst); 2195 format %{ "setvectmask $dst, $src" %} 2196 ins_encode %{ 2197 __ setvectmask($dst$$Register, $src$$Register); 2198 %} 2199 ins_pipe(pipe_slow); 2200%} 2201 2202// ============================================================================ 2203 2204instruct addF_reg(regF dst, regF src) %{ 2205 predicate((UseSSE>=1) && (UseAVX == 0)); 2206 match(Set dst (AddF dst src)); 2207 2208 format %{ "addss $dst, $src" %} 2209 ins_cost(150); 2210 ins_encode %{ 2211 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2212 %} 2213 ins_pipe(pipe_slow); 2214%} 2215 2216instruct addF_mem(regF dst, memory src) %{ 2217 predicate((UseSSE>=1) && (UseAVX == 0)); 2218 match(Set dst (AddF dst (LoadF src))); 2219 2220 format %{ "addss $dst, $src" %} 2221 ins_cost(150); 2222 ins_encode %{ 2223 __ addss($dst$$XMMRegister, $src$$Address); 2224 %} 2225 ins_pipe(pipe_slow); 2226%} 2227 2228instruct addF_imm(regF dst, immF con) %{ 2229 predicate((UseSSE>=1) && (UseAVX == 0)); 2230 match(Set dst (AddF dst con)); 2231 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2232 ins_cost(150); 2233 ins_encode %{ 2234 __ addss($dst$$XMMRegister, $constantaddress($con)); 2235 %} 2236 ins_pipe(pipe_slow); 2237%} 2238 2239instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2240 predicate(UseAVX > 0); 2241 match(Set dst (AddF src1 src2)); 2242 2243 format %{ "vaddss $dst, $src1, $src2" %} 2244 ins_cost(150); 2245 ins_encode %{ 2246 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2247 %} 2248 ins_pipe(pipe_slow); 2249%} 2250 2251instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2252 predicate(UseAVX > 0); 2253 match(Set dst (AddF src1 (LoadF src2))); 2254 2255 format %{ "vaddss $dst, $src1, $src2" %} 2256 ins_cost(150); 2257 ins_encode %{ 2258 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2259 %} 2260 ins_pipe(pipe_slow); 2261%} 2262 2263instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2264 predicate(UseAVX > 0); 2265 match(Set dst (AddF src con)); 2266 2267 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2268 ins_cost(150); 2269 ins_encode %{ 2270 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2271 %} 2272 ins_pipe(pipe_slow); 2273%} 2274 2275instruct addD_reg(regD dst, regD src) %{ 2276 predicate((UseSSE>=2) && (UseAVX == 0)); 2277 match(Set dst (AddD dst src)); 2278 2279 format %{ "addsd $dst, $src" %} 2280 ins_cost(150); 2281 ins_encode %{ 2282 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2283 %} 2284 ins_pipe(pipe_slow); 2285%} 2286 2287instruct addD_mem(regD dst, memory src) %{ 2288 predicate((UseSSE>=2) && (UseAVX == 0)); 2289 match(Set dst (AddD dst (LoadD src))); 2290 2291 format %{ "addsd $dst, $src" %} 2292 ins_cost(150); 2293 ins_encode %{ 2294 __ addsd($dst$$XMMRegister, $src$$Address); 2295 %} 2296 ins_pipe(pipe_slow); 2297%} 2298 2299instruct addD_imm(regD dst, immD con) %{ 2300 predicate((UseSSE>=2) && (UseAVX == 0)); 2301 match(Set dst (AddD dst con)); 2302 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2303 ins_cost(150); 2304 ins_encode %{ 2305 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2306 %} 2307 ins_pipe(pipe_slow); 2308%} 2309 2310instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2311 predicate(UseAVX > 0); 2312 match(Set dst (AddD src1 src2)); 2313 2314 format %{ "vaddsd $dst, $src1, $src2" %} 2315 ins_cost(150); 2316 ins_encode %{ 2317 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2318 %} 2319 ins_pipe(pipe_slow); 2320%} 2321 2322instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2323 predicate(UseAVX > 0); 2324 match(Set dst (AddD src1 (LoadD src2))); 2325 2326 format %{ "vaddsd $dst, $src1, $src2" %} 2327 ins_cost(150); 2328 ins_encode %{ 2329 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2330 %} 2331 ins_pipe(pipe_slow); 2332%} 2333 2334instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2335 predicate(UseAVX > 0); 2336 match(Set dst (AddD src con)); 2337 2338 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2339 ins_cost(150); 2340 ins_encode %{ 2341 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2342 %} 2343 ins_pipe(pipe_slow); 2344%} 2345 2346instruct subF_reg(regF dst, regF src) %{ 2347 predicate((UseSSE>=1) && (UseAVX == 0)); 2348 match(Set dst (SubF dst src)); 2349 2350 format %{ "subss $dst, $src" %} 2351 ins_cost(150); 2352 ins_encode %{ 2353 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2354 %} 2355 ins_pipe(pipe_slow); 2356%} 2357 2358instruct subF_mem(regF dst, memory src) %{ 2359 predicate((UseSSE>=1) && (UseAVX == 0)); 2360 match(Set dst (SubF dst (LoadF src))); 2361 2362 format %{ "subss $dst, $src" %} 2363 ins_cost(150); 2364 ins_encode %{ 2365 __ subss($dst$$XMMRegister, $src$$Address); 2366 %} 2367 ins_pipe(pipe_slow); 2368%} 2369 2370instruct subF_imm(regF dst, immF con) %{ 2371 predicate((UseSSE>=1) && (UseAVX == 0)); 2372 match(Set dst (SubF dst con)); 2373 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2374 ins_cost(150); 2375 ins_encode %{ 2376 __ subss($dst$$XMMRegister, $constantaddress($con)); 2377 %} 2378 ins_pipe(pipe_slow); 2379%} 2380 2381instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2382 predicate(UseAVX > 0); 2383 match(Set dst (SubF src1 src2)); 2384 2385 format %{ "vsubss $dst, $src1, $src2" %} 2386 ins_cost(150); 2387 ins_encode %{ 2388 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2389 %} 2390 ins_pipe(pipe_slow); 2391%} 2392 2393instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2394 predicate(UseAVX > 0); 2395 match(Set dst (SubF src1 (LoadF src2))); 2396 2397 format %{ "vsubss $dst, $src1, $src2" %} 2398 ins_cost(150); 2399 ins_encode %{ 2400 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2401 %} 2402 ins_pipe(pipe_slow); 2403%} 2404 2405instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2406 predicate(UseAVX > 0); 2407 match(Set dst (SubF src con)); 2408 2409 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2410 ins_cost(150); 2411 ins_encode %{ 2412 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2413 %} 2414 ins_pipe(pipe_slow); 2415%} 2416 2417instruct subD_reg(regD dst, regD src) %{ 2418 predicate((UseSSE>=2) && (UseAVX == 0)); 2419 match(Set dst (SubD dst src)); 2420 2421 format %{ "subsd $dst, $src" %} 2422 ins_cost(150); 2423 ins_encode %{ 2424 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2425 %} 2426 ins_pipe(pipe_slow); 2427%} 2428 2429instruct subD_mem(regD dst, memory src) %{ 2430 predicate((UseSSE>=2) && (UseAVX == 0)); 2431 match(Set dst (SubD dst (LoadD src))); 2432 2433 format %{ "subsd $dst, $src" %} 2434 ins_cost(150); 2435 ins_encode %{ 2436 __ subsd($dst$$XMMRegister, $src$$Address); 2437 %} 2438 ins_pipe(pipe_slow); 2439%} 2440 2441instruct subD_imm(regD dst, immD con) %{ 2442 predicate((UseSSE>=2) && (UseAVX == 0)); 2443 match(Set dst (SubD dst con)); 2444 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2445 ins_cost(150); 2446 ins_encode %{ 2447 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2448 %} 2449 ins_pipe(pipe_slow); 2450%} 2451 2452instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2453 predicate(UseAVX > 0); 2454 match(Set dst (SubD src1 src2)); 2455 2456 format %{ "vsubsd $dst, $src1, $src2" %} 2457 ins_cost(150); 2458 ins_encode %{ 2459 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2460 %} 2461 ins_pipe(pipe_slow); 2462%} 2463 2464instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2465 predicate(UseAVX > 0); 2466 match(Set dst (SubD src1 (LoadD src2))); 2467 2468 format %{ "vsubsd $dst, $src1, $src2" %} 2469 ins_cost(150); 2470 ins_encode %{ 2471 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2472 %} 2473 ins_pipe(pipe_slow); 2474%} 2475 2476instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2477 predicate(UseAVX > 0); 2478 match(Set dst (SubD src con)); 2479 2480 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2481 ins_cost(150); 2482 ins_encode %{ 2483 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2484 %} 2485 ins_pipe(pipe_slow); 2486%} 2487 2488instruct mulF_reg(regF dst, regF src) %{ 2489 predicate((UseSSE>=1) && (UseAVX == 0)); 2490 match(Set dst (MulF dst src)); 2491 2492 format %{ "mulss $dst, $src" %} 2493 ins_cost(150); 2494 ins_encode %{ 2495 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2496 %} 2497 ins_pipe(pipe_slow); 2498%} 2499 2500instruct mulF_mem(regF dst, memory src) %{ 2501 predicate((UseSSE>=1) && (UseAVX == 0)); 2502 match(Set dst (MulF dst (LoadF src))); 2503 2504 format %{ "mulss $dst, $src" %} 2505 ins_cost(150); 2506 ins_encode %{ 2507 __ mulss($dst$$XMMRegister, $src$$Address); 2508 %} 2509 ins_pipe(pipe_slow); 2510%} 2511 2512instruct mulF_imm(regF dst, immF con) %{ 2513 predicate((UseSSE>=1) && (UseAVX == 0)); 2514 match(Set dst (MulF dst con)); 2515 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2516 ins_cost(150); 2517 ins_encode %{ 2518 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2519 %} 2520 ins_pipe(pipe_slow); 2521%} 2522 2523instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2524 predicate(UseAVX > 0); 2525 match(Set dst (MulF src1 src2)); 2526 2527 format %{ "vmulss $dst, $src1, $src2" %} 2528 ins_cost(150); 2529 ins_encode %{ 2530 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2531 %} 2532 ins_pipe(pipe_slow); 2533%} 2534 2535instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2536 predicate(UseAVX > 0); 2537 match(Set dst (MulF src1 (LoadF src2))); 2538 2539 format %{ "vmulss $dst, $src1, $src2" %} 2540 ins_cost(150); 2541 ins_encode %{ 2542 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2543 %} 2544 ins_pipe(pipe_slow); 2545%} 2546 2547instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2548 predicate(UseAVX > 0); 2549 match(Set dst (MulF src con)); 2550 2551 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2552 ins_cost(150); 2553 ins_encode %{ 2554 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2555 %} 2556 ins_pipe(pipe_slow); 2557%} 2558 2559instruct mulD_reg(regD dst, regD src) %{ 2560 predicate((UseSSE>=2) && (UseAVX == 0)); 2561 match(Set dst (MulD dst src)); 2562 2563 format %{ "mulsd $dst, $src" %} 2564 ins_cost(150); 2565 ins_encode %{ 2566 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2567 %} 2568 ins_pipe(pipe_slow); 2569%} 2570 2571instruct mulD_mem(regD dst, memory src) %{ 2572 predicate((UseSSE>=2) && (UseAVX == 0)); 2573 match(Set dst (MulD dst (LoadD src))); 2574 2575 format %{ "mulsd $dst, $src" %} 2576 ins_cost(150); 2577 ins_encode %{ 2578 __ mulsd($dst$$XMMRegister, $src$$Address); 2579 %} 2580 ins_pipe(pipe_slow); 2581%} 2582 2583instruct mulD_imm(regD dst, immD con) %{ 2584 predicate((UseSSE>=2) && (UseAVX == 0)); 2585 match(Set dst (MulD dst con)); 2586 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2587 ins_cost(150); 2588 ins_encode %{ 2589 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2590 %} 2591 ins_pipe(pipe_slow); 2592%} 2593 2594instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2595 predicate(UseAVX > 0); 2596 match(Set dst (MulD src1 src2)); 2597 2598 format %{ "vmulsd $dst, $src1, $src2" %} 2599 ins_cost(150); 2600 ins_encode %{ 2601 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2602 %} 2603 ins_pipe(pipe_slow); 2604%} 2605 2606instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2607 predicate(UseAVX > 0); 2608 match(Set dst (MulD src1 (LoadD src2))); 2609 2610 format %{ "vmulsd $dst, $src1, $src2" %} 2611 ins_cost(150); 2612 ins_encode %{ 2613 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2614 %} 2615 ins_pipe(pipe_slow); 2616%} 2617 2618instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2619 predicate(UseAVX > 0); 2620 match(Set dst (MulD src con)); 2621 2622 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2623 ins_cost(150); 2624 ins_encode %{ 2625 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2626 %} 2627 ins_pipe(pipe_slow); 2628%} 2629 2630instruct divF_reg(regF dst, regF src) %{ 2631 predicate((UseSSE>=1) && (UseAVX == 0)); 2632 match(Set dst (DivF dst src)); 2633 2634 format %{ "divss $dst, $src" %} 2635 ins_cost(150); 2636 ins_encode %{ 2637 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2638 %} 2639 ins_pipe(pipe_slow); 2640%} 2641 2642instruct divF_mem(regF dst, memory src) %{ 2643 predicate((UseSSE>=1) && (UseAVX == 0)); 2644 match(Set dst (DivF dst (LoadF src))); 2645 2646 format %{ "divss $dst, $src" %} 2647 ins_cost(150); 2648 ins_encode %{ 2649 __ divss($dst$$XMMRegister, $src$$Address); 2650 %} 2651 ins_pipe(pipe_slow); 2652%} 2653 2654instruct divF_imm(regF dst, immF con) %{ 2655 predicate((UseSSE>=1) && (UseAVX == 0)); 2656 match(Set dst (DivF dst con)); 2657 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2658 ins_cost(150); 2659 ins_encode %{ 2660 __ divss($dst$$XMMRegister, $constantaddress($con)); 2661 %} 2662 ins_pipe(pipe_slow); 2663%} 2664 2665instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2666 predicate(UseAVX > 0); 2667 match(Set dst (DivF src1 src2)); 2668 2669 format %{ "vdivss $dst, $src1, $src2" %} 2670 ins_cost(150); 2671 ins_encode %{ 2672 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2673 %} 2674 ins_pipe(pipe_slow); 2675%} 2676 2677instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2678 predicate(UseAVX > 0); 2679 match(Set dst (DivF src1 (LoadF src2))); 2680 2681 format %{ "vdivss $dst, $src1, $src2" %} 2682 ins_cost(150); 2683 ins_encode %{ 2684 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2685 %} 2686 ins_pipe(pipe_slow); 2687%} 2688 2689instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2690 predicate(UseAVX > 0); 2691 match(Set dst (DivF src con)); 2692 2693 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2694 ins_cost(150); 2695 ins_encode %{ 2696 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2697 %} 2698 ins_pipe(pipe_slow); 2699%} 2700 2701instruct divD_reg(regD dst, regD src) %{ 2702 predicate((UseSSE>=2) && (UseAVX == 0)); 2703 match(Set dst (DivD dst src)); 2704 2705 format %{ "divsd $dst, $src" %} 2706 ins_cost(150); 2707 ins_encode %{ 2708 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2709 %} 2710 ins_pipe(pipe_slow); 2711%} 2712 2713instruct divD_mem(regD dst, memory src) %{ 2714 predicate((UseSSE>=2) && (UseAVX == 0)); 2715 match(Set dst (DivD dst (LoadD src))); 2716 2717 format %{ "divsd $dst, $src" %} 2718 ins_cost(150); 2719 ins_encode %{ 2720 __ divsd($dst$$XMMRegister, $src$$Address); 2721 %} 2722 ins_pipe(pipe_slow); 2723%} 2724 2725instruct divD_imm(regD dst, immD con) %{ 2726 predicate((UseSSE>=2) && (UseAVX == 0)); 2727 match(Set dst (DivD dst con)); 2728 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2729 ins_cost(150); 2730 ins_encode %{ 2731 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2732 %} 2733 ins_pipe(pipe_slow); 2734%} 2735 2736instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2737 predicate(UseAVX > 0); 2738 match(Set dst (DivD src1 src2)); 2739 2740 format %{ "vdivsd $dst, $src1, $src2" %} 2741 ins_cost(150); 2742 ins_encode %{ 2743 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2744 %} 2745 ins_pipe(pipe_slow); 2746%} 2747 2748instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2749 predicate(UseAVX > 0); 2750 match(Set dst (DivD src1 (LoadD src2))); 2751 2752 format %{ "vdivsd $dst, $src1, $src2" %} 2753 ins_cost(150); 2754 ins_encode %{ 2755 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2756 %} 2757 ins_pipe(pipe_slow); 2758%} 2759 2760instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2761 predicate(UseAVX > 0); 2762 match(Set dst (DivD src con)); 2763 2764 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2765 ins_cost(150); 2766 ins_encode %{ 2767 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2768 %} 2769 ins_pipe(pipe_slow); 2770%} 2771 2772instruct absF_reg(regF dst) %{ 2773 predicate((UseSSE>=1) && (UseAVX == 0)); 2774 match(Set dst (AbsF dst)); 2775 ins_cost(150); 2776 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2777 ins_encode %{ 2778 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2779 %} 2780 ins_pipe(pipe_slow); 2781%} 2782 2783instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2784 predicate(UseAVX > 0); 2785 match(Set dst (AbsF src)); 2786 ins_cost(150); 2787 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2788 ins_encode %{ 2789 int vector_len = 0; 2790 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2791 ExternalAddress(float_signmask()), vector_len); 2792 %} 2793 ins_pipe(pipe_slow); 2794%} 2795 2796instruct absD_reg(regD dst) %{ 2797 predicate((UseSSE>=2) && (UseAVX == 0)); 2798 match(Set dst (AbsD dst)); 2799 ins_cost(150); 2800 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2801 "# abs double by sign masking" %} 2802 ins_encode %{ 2803 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2804 %} 2805 ins_pipe(pipe_slow); 2806%} 2807 2808instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2809 predicate(UseAVX > 0); 2810 match(Set dst (AbsD src)); 2811 ins_cost(150); 2812 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2813 "# abs double by sign masking" %} 2814 ins_encode %{ 2815 int vector_len = 0; 2816 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2817 ExternalAddress(double_signmask()), vector_len); 2818 %} 2819 ins_pipe(pipe_slow); 2820%} 2821 2822instruct negF_reg(regF dst) %{ 2823 predicate((UseSSE>=1) && (UseAVX == 0)); 2824 match(Set dst (NegF dst)); 2825 ins_cost(150); 2826 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2827 ins_encode %{ 2828 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2829 %} 2830 ins_pipe(pipe_slow); 2831%} 2832 2833instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2834 predicate(UseAVX > 0); 2835 match(Set dst (NegF src)); 2836 ins_cost(150); 2837 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2838 ins_encode %{ 2839 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2840 ExternalAddress(float_signflip())); 2841 %} 2842 ins_pipe(pipe_slow); 2843%} 2844 2845instruct negD_reg(regD dst) %{ 2846 predicate((UseSSE>=2) && (UseAVX == 0)); 2847 match(Set dst (NegD dst)); 2848 ins_cost(150); 2849 format %{ "xorpd $dst, [0x8000000000000000]\t" 2850 "# neg double by sign flipping" %} 2851 ins_encode %{ 2852 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2853 %} 2854 ins_pipe(pipe_slow); 2855%} 2856 2857instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2858 predicate(UseAVX > 0); 2859 match(Set dst (NegD src)); 2860 ins_cost(150); 2861 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2862 "# neg double by sign flipping" %} 2863 ins_encode %{ 2864 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2865 ExternalAddress(double_signflip())); 2866 %} 2867 ins_pipe(pipe_slow); 2868%} 2869 2870instruct sqrtF_reg(regF dst, regF src) %{ 2871 predicate(UseSSE>=1); 2872 match(Set dst (SqrtF src)); 2873 2874 format %{ "sqrtss $dst, $src" %} 2875 ins_cost(150); 2876 ins_encode %{ 2877 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2878 %} 2879 ins_pipe(pipe_slow); 2880%} 2881 2882instruct sqrtF_mem(regF dst, memory src) %{ 2883 predicate(UseSSE>=1); 2884 match(Set dst (SqrtF (LoadF src))); 2885 2886 format %{ "sqrtss $dst, $src" %} 2887 ins_cost(150); 2888 ins_encode %{ 2889 __ sqrtss($dst$$XMMRegister, $src$$Address); 2890 %} 2891 ins_pipe(pipe_slow); 2892%} 2893 2894instruct sqrtF_imm(regF dst, immF con) %{ 2895 predicate(UseSSE>=1); 2896 match(Set dst (SqrtF con)); 2897 2898 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2899 ins_cost(150); 2900 ins_encode %{ 2901 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2902 %} 2903 ins_pipe(pipe_slow); 2904%} 2905 2906instruct sqrtD_reg(regD dst, regD src) %{ 2907 predicate(UseSSE>=2); 2908 match(Set dst (SqrtD src)); 2909 2910 format %{ "sqrtsd $dst, $src" %} 2911 ins_cost(150); 2912 ins_encode %{ 2913 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2914 %} 2915 ins_pipe(pipe_slow); 2916%} 2917 2918instruct sqrtD_mem(regD dst, memory src) %{ 2919 predicate(UseSSE>=2); 2920 match(Set dst (SqrtD (LoadD src))); 2921 2922 format %{ "sqrtsd $dst, $src" %} 2923 ins_cost(150); 2924 ins_encode %{ 2925 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2926 %} 2927 ins_pipe(pipe_slow); 2928%} 2929 2930instruct sqrtD_imm(regD dst, immD con) %{ 2931 predicate(UseSSE>=2); 2932 match(Set dst (SqrtD con)); 2933 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2934 ins_cost(150); 2935 ins_encode %{ 2936 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2937 %} 2938 ins_pipe(pipe_slow); 2939%} 2940 2941 2942#ifdef _LP64 2943instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 2944 match(Set dst (RoundDoubleMode src rmode)); 2945 format %{ "roundsd $dst,$src" %} 2946 ins_cost(150); 2947 ins_encode %{ 2948 assert(UseSSE >= 4, "required"); 2949 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 2950 %} 2951 ins_pipe(pipe_slow); 2952%} 2953 2954instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 2955 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 2956 format %{ "roundsd $dst,$src" %} 2957 ins_cost(150); 2958 ins_encode %{ 2959 assert(UseSSE >= 4, "required"); 2960 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 2961 %} 2962 ins_pipe(pipe_slow); 2963%} 2964 2965instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 2966 match(Set dst (RoundDoubleMode con rmode)); 2967 effect(TEMP scratch_reg); 2968 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 2969 ins_cost(150); 2970 ins_encode %{ 2971 assert(UseSSE >= 4, "required"); 2972 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 2973 %} 2974 ins_pipe(pipe_slow); 2975%} 2976 2977instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 2978 predicate(n->as_Vector()->length() < 8); 2979 match(Set dst (RoundDoubleModeV src rmode)); 2980 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 2981 ins_encode %{ 2982 assert(UseAVX > 0, "required"); 2983 int vector_len = vector_length_encoding(this); 2984 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len); 2985 %} 2986 ins_pipe( pipe_slow ); 2987%} 2988 2989instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 2990 predicate(n->as_Vector()->length() == 8); 2991 match(Set dst (RoundDoubleModeV src rmode)); 2992 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 2993 ins_encode %{ 2994 assert(UseAVX > 2, "required"); 2995 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 2996 %} 2997 ins_pipe( pipe_slow ); 2998%} 2999 3000instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3001 predicate(n->as_Vector()->length() < 8); 3002 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3003 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3004 ins_encode %{ 3005 assert(UseAVX > 0, "required"); 3006 int vector_len = vector_length_encoding(this); 3007 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len); 3008 %} 3009 ins_pipe( pipe_slow ); 3010%} 3011 3012instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3013 predicate(n->as_Vector()->length() == 8); 3014 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3015 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3016 ins_encode %{ 3017 assert(UseAVX > 2, "required"); 3018 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3019 %} 3020 ins_pipe( pipe_slow ); 3021%} 3022#endif // _LP64 3023 3024instruct onspinwait() %{ 3025 match(OnSpinWait); 3026 ins_cost(200); 3027 3028 format %{ 3029 $$template 3030 $$emit$$"pause\t! membar_onspinwait" 3031 %} 3032 ins_encode %{ 3033 __ pause(); 3034 %} 3035 ins_pipe(pipe_slow); 3036%} 3037 3038// a * b + c 3039instruct fmaD_reg(regD a, regD b, regD c) %{ 3040 predicate(UseFMA); 3041 match(Set c (FmaD c (Binary a b))); 3042 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3043 ins_cost(150); 3044 ins_encode %{ 3045 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3046 %} 3047 ins_pipe( pipe_slow ); 3048%} 3049 3050// a * b + c 3051instruct fmaF_reg(regF a, regF b, regF c) %{ 3052 predicate(UseFMA); 3053 match(Set c (FmaF c (Binary a b))); 3054 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3055 ins_cost(150); 3056 ins_encode %{ 3057 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3058 %} 3059 ins_pipe( pipe_slow ); 3060%} 3061 3062// ====================VECTOR INSTRUCTIONS===================================== 3063 3064// Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3065instruct MoveVec2Leg(legVec dst, vec src) %{ 3066 match(Set dst src); 3067 format %{ "" %} 3068 ins_encode %{ 3069 ShouldNotReachHere(); 3070 %} 3071 ins_pipe( fpu_reg_reg ); 3072%} 3073 3074instruct MoveLeg2Vec(vec dst, legVec src) %{ 3075 match(Set dst src); 3076 format %{ "" %} 3077 ins_encode %{ 3078 ShouldNotReachHere(); 3079 %} 3080 ins_pipe( fpu_reg_reg ); 3081%} 3082 3083// ============================================================================ 3084 3085// Load vectors 3086instruct loadV(vec dst, memory mem) %{ 3087 match(Set dst (LoadVector mem)); 3088 ins_cost(125); 3089 format %{ "load_vector $dst,$mem" %} 3090 ins_encode %{ 3091 switch (vector_length_in_bytes(this)) { 3092 case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break; 3093 case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break; 3094 case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break; 3095 case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break; 3096 case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break; 3097 default: ShouldNotReachHere(); 3098 } 3099 %} 3100 ins_pipe( pipe_slow ); 3101%} 3102 3103// Store vectors generic operand pattern. 3104instruct storeV(memory mem, vec src) %{ 3105 match(Set mem (StoreVector mem src)); 3106 ins_cost(145); 3107 format %{ "store_vector $mem,$src\n\t" %} 3108 ins_encode %{ 3109 switch (vector_length_in_bytes(this, $src)) { 3110 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3111 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3112 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3113 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3114 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3115 default: ShouldNotReachHere(); 3116 } 3117 %} 3118 ins_pipe( pipe_slow ); 3119%} 3120 3121// ====================LEGACY REPLICATE======================================= 3122 3123instruct Repl16B(vec dst, rRegI src) %{ 3124 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3125 match(Set dst (ReplicateB src)); 3126 format %{ "movd $dst,$src\n\t" 3127 "punpcklbw $dst,$dst\n\t" 3128 "pshuflw $dst,$dst,0x00\n\t" 3129 "punpcklqdq $dst,$dst\t! replicate16B" %} 3130 ins_encode %{ 3131 __ movdl($dst$$XMMRegister, $src$$Register); 3132 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3133 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3134 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3135 %} 3136 ins_pipe( pipe_slow ); 3137%} 3138 3139instruct Repl32B(vec dst, rRegI src) %{ 3140 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3141 match(Set dst (ReplicateB src)); 3142 format %{ "movd $dst,$src\n\t" 3143 "punpcklbw $dst,$dst\n\t" 3144 "pshuflw $dst,$dst,0x00\n\t" 3145 "punpcklqdq $dst,$dst\n\t" 3146 "vinserti128_high $dst,$dst\t! replicate32B" %} 3147 ins_encode %{ 3148 __ movdl($dst$$XMMRegister, $src$$Register); 3149 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3150 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3151 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3152 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3153 %} 3154 ins_pipe( pipe_slow ); 3155%} 3156 3157instruct Repl64B(legVec dst, rRegI src) %{ 3158 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3159 match(Set dst (ReplicateB src)); 3160 format %{ "movd $dst,$src\n\t" 3161 "punpcklbw $dst,$dst\n\t" 3162 "pshuflw $dst,$dst,0x00\n\t" 3163 "punpcklqdq $dst,$dst\n\t" 3164 "vinserti128_high $dst,$dst\t" 3165 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3166 ins_encode %{ 3167 __ movdl($dst$$XMMRegister, $src$$Register); 3168 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3169 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3170 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3171 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3172 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3173 %} 3174 ins_pipe( pipe_slow ); 3175%} 3176 3177instruct Repl16B_imm(vec dst, immI con) %{ 3178 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3179 match(Set dst (ReplicateB con)); 3180 format %{ "movq $dst,[$constantaddress]\n\t" 3181 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3182 ins_encode %{ 3183 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3184 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3185 %} 3186 ins_pipe( pipe_slow ); 3187%} 3188 3189instruct Repl32B_imm(vec dst, immI con) %{ 3190 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3191 match(Set dst (ReplicateB con)); 3192 format %{ "movq $dst,[$constantaddress]\n\t" 3193 "punpcklqdq $dst,$dst\n\t" 3194 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3195 ins_encode %{ 3196 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3197 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3198 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3199 %} 3200 ins_pipe( pipe_slow ); 3201%} 3202 3203instruct Repl64B_imm(legVec dst, immI con) %{ 3204 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3205 match(Set dst (ReplicateB con)); 3206 format %{ "movq $dst,[$constantaddress]\n\t" 3207 "punpcklqdq $dst,$dst\n\t" 3208 "vinserti128_high $dst,$dst\t" 3209 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3210 ins_encode %{ 3211 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3212 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3213 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3214 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3215 %} 3216 ins_pipe( pipe_slow ); 3217%} 3218 3219instruct Repl4S(vec dst, rRegI src) %{ 3220 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3221 match(Set dst (ReplicateS src)); 3222 format %{ "movd $dst,$src\n\t" 3223 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3224 ins_encode %{ 3225 __ movdl($dst$$XMMRegister, $src$$Register); 3226 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3227 %} 3228 ins_pipe( pipe_slow ); 3229%} 3230 3231instruct Repl4S_mem(vec dst, memory mem) %{ 3232 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3233 match(Set dst (ReplicateS (LoadS mem))); 3234 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3235 ins_encode %{ 3236 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3237 %} 3238 ins_pipe( pipe_slow ); 3239%} 3240 3241instruct Repl8S(vec dst, rRegI src) %{ 3242 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3243 match(Set dst (ReplicateS src)); 3244 format %{ "movd $dst,$src\n\t" 3245 "pshuflw $dst,$dst,0x00\n\t" 3246 "punpcklqdq $dst,$dst\t! replicate8S" %} 3247 ins_encode %{ 3248 __ movdl($dst$$XMMRegister, $src$$Register); 3249 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3250 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3251 %} 3252 ins_pipe( pipe_slow ); 3253%} 3254 3255instruct Repl8S_mem(vec dst, memory mem) %{ 3256 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3257 match(Set dst (ReplicateS (LoadS mem))); 3258 format %{ "pshuflw $dst,$mem,0x00\n\t" 3259 "punpcklqdq $dst,$dst\t! replicate8S" %} 3260 ins_encode %{ 3261 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3262 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3263 %} 3264 ins_pipe( pipe_slow ); 3265%} 3266 3267instruct Repl8S_imm(vec dst, immI con) %{ 3268 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3269 match(Set dst (ReplicateS con)); 3270 format %{ "movq $dst,[$constantaddress]\n\t" 3271 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3272 ins_encode %{ 3273 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3274 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3275 %} 3276 ins_pipe( pipe_slow ); 3277%} 3278 3279instruct Repl16S(vec dst, rRegI src) %{ 3280 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3281 match(Set dst (ReplicateS src)); 3282 format %{ "movd $dst,$src\n\t" 3283 "pshuflw $dst,$dst,0x00\n\t" 3284 "punpcklqdq $dst,$dst\n\t" 3285 "vinserti128_high $dst,$dst\t! replicate16S" %} 3286 ins_encode %{ 3287 __ movdl($dst$$XMMRegister, $src$$Register); 3288 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3289 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3290 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3291 %} 3292 ins_pipe( pipe_slow ); 3293%} 3294 3295instruct Repl16S_mem(vec dst, memory mem) %{ 3296 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3297 match(Set dst (ReplicateS (LoadS mem))); 3298 format %{ "pshuflw $dst,$mem,0x00\n\t" 3299 "punpcklqdq $dst,$dst\n\t" 3300 "vinserti128_high $dst,$dst\t! replicate16S" %} 3301 ins_encode %{ 3302 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3303 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3304 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3305 %} 3306 ins_pipe( pipe_slow ); 3307%} 3308 3309instruct Repl16S_imm(vec dst, immI con) %{ 3310 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3311 match(Set dst (ReplicateS con)); 3312 format %{ "movq $dst,[$constantaddress]\n\t" 3313 "punpcklqdq $dst,$dst\n\t" 3314 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3315 ins_encode %{ 3316 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3317 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3318 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3319 %} 3320 ins_pipe( pipe_slow ); 3321%} 3322 3323instruct Repl32S(legVec dst, rRegI src) %{ 3324 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3325 match(Set dst (ReplicateS src)); 3326 format %{ "movd $dst,$src\n\t" 3327 "pshuflw $dst,$dst,0x00\n\t" 3328 "punpcklqdq $dst,$dst\n\t" 3329 "vinserti128_high $dst,$dst\t" 3330 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3331 ins_encode %{ 3332 __ movdl($dst$$XMMRegister, $src$$Register); 3333 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3334 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3335 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3336 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3337 %} 3338 ins_pipe( pipe_slow ); 3339%} 3340 3341instruct Repl32S_mem(legVec dst, memory mem) %{ 3342 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3343 match(Set dst (ReplicateS (LoadS mem))); 3344 format %{ "pshuflw $dst,$mem,0x00\n\t" 3345 "punpcklqdq $dst,$dst\n\t" 3346 "vinserti128_high $dst,$dst\t" 3347 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3348 ins_encode %{ 3349 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3350 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3351 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3352 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3353 %} 3354 ins_pipe( pipe_slow ); 3355%} 3356 3357instruct Repl32S_imm(legVec dst, immI con) %{ 3358 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3359 match(Set dst (ReplicateS con)); 3360 format %{ "movq $dst,[$constantaddress]\n\t" 3361 "punpcklqdq $dst,$dst\n\t" 3362 "vinserti128_high $dst,$dst\t" 3363 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3364 ins_encode %{ 3365 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3366 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3367 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3368 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3369 %} 3370 ins_pipe( pipe_slow ); 3371%} 3372 3373instruct Repl4I(vec dst, rRegI src) %{ 3374 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3375 match(Set dst (ReplicateI src)); 3376 format %{ "movd $dst,$src\n\t" 3377 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3378 ins_encode %{ 3379 __ movdl($dst$$XMMRegister, $src$$Register); 3380 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3381 %} 3382 ins_pipe( pipe_slow ); 3383%} 3384 3385instruct Repl4I_mem(vec dst, memory mem) %{ 3386 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3387 match(Set dst (ReplicateI (LoadI mem))); 3388 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3389 ins_encode %{ 3390 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3391 %} 3392 ins_pipe( pipe_slow ); 3393%} 3394 3395instruct Repl8I(vec dst, rRegI src) %{ 3396 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3397 match(Set dst (ReplicateI src)); 3398 format %{ "movd $dst,$src\n\t" 3399 "pshufd $dst,$dst,0x00\n\t" 3400 "vinserti128_high $dst,$dst\t! replicate8I" %} 3401 ins_encode %{ 3402 __ movdl($dst$$XMMRegister, $src$$Register); 3403 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3404 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3405 %} 3406 ins_pipe( pipe_slow ); 3407%} 3408 3409instruct Repl8I_mem(vec dst, memory mem) %{ 3410 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3411 match(Set dst (ReplicateI (LoadI mem))); 3412 format %{ "pshufd $dst,$mem,0x00\n\t" 3413 "vinserti128_high $dst,$dst\t! replicate8I" %} 3414 ins_encode %{ 3415 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3416 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3417 %} 3418 ins_pipe( pipe_slow ); 3419%} 3420 3421instruct Repl16I(legVec dst, rRegI src) %{ 3422 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3423 match(Set dst (ReplicateI src)); 3424 format %{ "movd $dst,$src\n\t" 3425 "pshufd $dst,$dst,0x00\n\t" 3426 "vinserti128_high $dst,$dst\t" 3427 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3428 ins_encode %{ 3429 __ movdl($dst$$XMMRegister, $src$$Register); 3430 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3431 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3432 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3433 %} 3434 ins_pipe( pipe_slow ); 3435%} 3436 3437instruct Repl16I_mem(legVec dst, memory mem) %{ 3438 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3439 match(Set dst (ReplicateI (LoadI mem))); 3440 format %{ "pshufd $dst,$mem,0x00\n\t" 3441 "vinserti128_high $dst,$dst\t" 3442 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3443 ins_encode %{ 3444 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3445 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3446 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3447 %} 3448 ins_pipe( pipe_slow ); 3449%} 3450 3451instruct Repl4I_imm(vec dst, immI con) %{ 3452 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3453 match(Set dst (ReplicateI con)); 3454 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3455 "punpcklqdq $dst,$dst" %} 3456 ins_encode %{ 3457 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3458 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3459 %} 3460 ins_pipe( pipe_slow ); 3461%} 3462 3463instruct Repl8I_imm(vec dst, immI con) %{ 3464 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3465 match(Set dst (ReplicateI con)); 3466 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3467 "punpcklqdq $dst,$dst\n\t" 3468 "vinserti128_high $dst,$dst" %} 3469 ins_encode %{ 3470 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3471 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3472 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3473 %} 3474 ins_pipe( pipe_slow ); 3475%} 3476 3477instruct Repl16I_imm(legVec dst, immI con) %{ 3478 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3479 match(Set dst (ReplicateI con)); 3480 format %{ "movq $dst,[$constantaddress]\t" 3481 "punpcklqdq $dst,$dst\n\t" 3482 "vinserti128_high $dst,$dst" 3483 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3484 ins_encode %{ 3485 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3486 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3487 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3488 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3489 %} 3490 ins_pipe( pipe_slow ); 3491%} 3492 3493// Long could be loaded into xmm register directly from memory. 3494instruct Repl2L_mem(vec dst, memory mem) %{ 3495 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3496 match(Set dst (ReplicateL (LoadL mem))); 3497 format %{ "movq $dst,$mem\n\t" 3498 "punpcklqdq $dst,$dst\t! replicate2L" %} 3499 ins_encode %{ 3500 __ movq($dst$$XMMRegister, $mem$$Address); 3501 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3502 %} 3503 ins_pipe( pipe_slow ); 3504%} 3505 3506// Replicate long (8 byte) scalar to be vector 3507#ifdef _LP64 3508instruct Repl4L(vec dst, rRegL src) %{ 3509 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3510 match(Set dst (ReplicateL src)); 3511 format %{ "movdq $dst,$src\n\t" 3512 "punpcklqdq $dst,$dst\n\t" 3513 "vinserti128_high $dst,$dst\t! replicate4L" %} 3514 ins_encode %{ 3515 __ movdq($dst$$XMMRegister, $src$$Register); 3516 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3517 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3518 %} 3519 ins_pipe( pipe_slow ); 3520%} 3521 3522instruct Repl8L(legVec dst, rRegL src) %{ 3523 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3524 match(Set dst (ReplicateL src)); 3525 format %{ "movdq $dst,$src\n\t" 3526 "punpcklqdq $dst,$dst\n\t" 3527 "vinserti128_high $dst,$dst\t" 3528 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3529 ins_encode %{ 3530 __ movdq($dst$$XMMRegister, $src$$Register); 3531 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3532 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3533 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3534 %} 3535 ins_pipe( pipe_slow ); 3536%} 3537#else // _LP64 3538instruct Repl4L(vec dst, eRegL src, vec tmp) %{ 3539 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3540 match(Set dst (ReplicateL src)); 3541 effect(TEMP dst, USE src, TEMP tmp); 3542 format %{ "movdl $dst,$src.lo\n\t" 3543 "movdl $tmp,$src.hi\n\t" 3544 "punpckldq $dst,$tmp\n\t" 3545 "punpcklqdq $dst,$dst\n\t" 3546 "vinserti128_high $dst,$dst\t! replicate4L" %} 3547 ins_encode %{ 3548 __ movdl($dst$$XMMRegister, $src$$Register); 3549 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3550 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3551 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3552 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3553 %} 3554 ins_pipe( pipe_slow ); 3555%} 3556 3557instruct Repl8L(legVec dst, eRegL src, legVec tmp) %{ 3558 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3559 match(Set dst (ReplicateL src)); 3560 effect(TEMP dst, USE src, TEMP tmp); 3561 format %{ "movdl $dst,$src.lo\n\t" 3562 "movdl $tmp,$src.hi\n\t" 3563 "punpckldq $dst,$tmp\n\t" 3564 "punpcklqdq $dst,$dst\n\t" 3565 "vinserti128_high $dst,$dst\t" 3566 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3567 ins_encode %{ 3568 __ movdl($dst$$XMMRegister, $src$$Register); 3569 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3570 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3571 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3572 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3573 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3574 %} 3575 ins_pipe( pipe_slow ); 3576%} 3577#endif // _LP64 3578 3579instruct Repl4L_imm(vec dst, immL con) %{ 3580 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3581 match(Set dst (ReplicateL con)); 3582 format %{ "movq $dst,[$constantaddress]\n\t" 3583 "punpcklqdq $dst,$dst\n\t" 3584 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3585 ins_encode %{ 3586 __ movq($dst$$XMMRegister, $constantaddress($con)); 3587 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3588 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3589 %} 3590 ins_pipe( pipe_slow ); 3591%} 3592 3593instruct Repl8L_imm(legVec dst, immL con) %{ 3594 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3595 match(Set dst (ReplicateL con)); 3596 format %{ "movq $dst,[$constantaddress]\n\t" 3597 "punpcklqdq $dst,$dst\n\t" 3598 "vinserti128_high $dst,$dst\t" 3599 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3600 ins_encode %{ 3601 __ movq($dst$$XMMRegister, $constantaddress($con)); 3602 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3603 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3604 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3605 %} 3606 ins_pipe( pipe_slow ); 3607%} 3608 3609instruct Repl4L_mem(vec dst, memory mem) %{ 3610 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3611 match(Set dst (ReplicateL (LoadL mem))); 3612 format %{ "movq $dst,$mem\n\t" 3613 "punpcklqdq $dst,$dst\n\t" 3614 "vinserti128_high $dst,$dst\t! replicate4L" %} 3615 ins_encode %{ 3616 __ movq($dst$$XMMRegister, $mem$$Address); 3617 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3618 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3619 %} 3620 ins_pipe( pipe_slow ); 3621%} 3622 3623instruct Repl8L_mem(legVec dst, memory mem) %{ 3624 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3625 match(Set dst (ReplicateL (LoadL mem))); 3626 format %{ "movq $dst,$mem\n\t" 3627 "punpcklqdq $dst,$dst\n\t" 3628 "vinserti128_high $dst,$dst\t" 3629 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3630 ins_encode %{ 3631 __ movq($dst$$XMMRegister, $mem$$Address); 3632 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3633 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3634 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3635 %} 3636 ins_pipe( pipe_slow ); 3637%} 3638 3639instruct Repl2F_mem(vec dst, memory mem) %{ 3640 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3641 match(Set dst (ReplicateF (LoadF mem))); 3642 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3643 ins_encode %{ 3644 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3645 %} 3646 ins_pipe( pipe_slow ); 3647%} 3648 3649instruct Repl4F_mem(vec dst, memory mem) %{ 3650 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3651 match(Set dst (ReplicateF (LoadF mem))); 3652 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3653 ins_encode %{ 3654 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3655 %} 3656 ins_pipe( pipe_slow ); 3657%} 3658 3659instruct Repl8F(vec dst, vlRegF src) %{ 3660 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3661 match(Set dst (ReplicateF src)); 3662 format %{ "pshufd $dst,$src,0x00\n\t" 3663 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3664 ins_encode %{ 3665 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3666 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3667 %} 3668 ins_pipe( pipe_slow ); 3669%} 3670 3671instruct Repl8F_mem(vec dst, memory mem) %{ 3672 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3673 match(Set dst (ReplicateF (LoadF mem))); 3674 format %{ "pshufd $dst,$mem,0x00\n\t" 3675 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3676 ins_encode %{ 3677 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3678 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3679 %} 3680 ins_pipe( pipe_slow ); 3681%} 3682 3683instruct Repl16F(legVec dst, vlRegF src) %{ 3684 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3685 match(Set dst (ReplicateF src)); 3686 format %{ "pshufd $dst,$src,0x00\n\t" 3687 "vinsertf128_high $dst,$dst\t" 3688 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3689 ins_encode %{ 3690 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3691 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3692 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3693 %} 3694 ins_pipe( pipe_slow ); 3695%} 3696 3697instruct Repl16F_mem(legVec dst, memory mem) %{ 3698 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3699 match(Set dst (ReplicateF (LoadF mem))); 3700 format %{ "pshufd $dst,$mem,0x00\n\t" 3701 "vinsertf128_high $dst,$dst\t" 3702 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3703 ins_encode %{ 3704 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3705 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3706 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3707 %} 3708 ins_pipe( pipe_slow ); 3709%} 3710 3711instruct Repl2F_zero(vec dst, immF0 zero) %{ 3712 predicate(n->as_Vector()->length() == 2); 3713 match(Set dst (ReplicateF zero)); 3714 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3715 ins_encode %{ 3716 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3717 %} 3718 ins_pipe( fpu_reg_reg ); 3719%} 3720 3721instruct Repl4F_zero(vec dst, immF0 zero) %{ 3722 predicate(n->as_Vector()->length() == 4); 3723 match(Set dst (ReplicateF zero)); 3724 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3725 ins_encode %{ 3726 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3727 %} 3728 ins_pipe( fpu_reg_reg ); 3729%} 3730 3731instruct Repl8F_zero(vec dst, immF0 zero) %{ 3732 predicate(n->as_Vector()->length() == 8 && UseAVX > 0); 3733 match(Set dst (ReplicateF zero)); 3734 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3735 ins_encode %{ 3736 int vector_len = 1; 3737 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3738 %} 3739 ins_pipe( fpu_reg_reg ); 3740%} 3741 3742instruct Repl2D_mem(vec dst, memory mem) %{ 3743 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3744 match(Set dst (ReplicateD (LoadD mem))); 3745 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3746 ins_encode %{ 3747 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3748 %} 3749 ins_pipe( pipe_slow ); 3750%} 3751 3752instruct Repl4D(vec dst, vlRegD src) %{ 3753 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3754 match(Set dst (ReplicateD src)); 3755 format %{ "pshufd $dst,$src,0x44\n\t" 3756 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3757 ins_encode %{ 3758 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3759 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3760 %} 3761 ins_pipe( pipe_slow ); 3762%} 3763 3764instruct Repl4D_mem(vec dst, memory mem) %{ 3765 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3766 match(Set dst (ReplicateD (LoadD mem))); 3767 format %{ "pshufd $dst,$mem,0x44\n\t" 3768 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3769 ins_encode %{ 3770 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3771 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3772 %} 3773 ins_pipe( pipe_slow ); 3774%} 3775 3776instruct Repl8D(legVec dst, vlRegD src) %{ 3777 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3778 match(Set dst (ReplicateD src)); 3779 format %{ "pshufd $dst,$src,0x44\n\t" 3780 "vinsertf128_high $dst,$dst\t" 3781 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3782 ins_encode %{ 3783 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3784 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3785 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3786 %} 3787 ins_pipe( pipe_slow ); 3788%} 3789 3790instruct Repl8D_mem(legVec dst, memory mem) %{ 3791 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3792 match(Set dst (ReplicateD (LoadD mem))); 3793 format %{ "pshufd $dst,$mem,0x44\n\t" 3794 "vinsertf128_high $dst,$dst\t" 3795 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3796 ins_encode %{ 3797 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3798 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3799 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3800 %} 3801 ins_pipe( pipe_slow ); 3802%} 3803 3804// Replicate double (8 byte) scalar zero to be vector 3805instruct Repl2D_zero(vec dst, immD0 zero) %{ 3806 predicate(n->as_Vector()->length() == 2); 3807 match(Set dst (ReplicateD zero)); 3808 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3809 ins_encode %{ 3810 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3811 %} 3812 ins_pipe( fpu_reg_reg ); 3813%} 3814 3815instruct Repl4D_zero(vec dst, immD0 zero) %{ 3816 predicate(n->as_Vector()->length() == 4 && UseAVX > 0); 3817 match(Set dst (ReplicateD zero)); 3818 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3819 ins_encode %{ 3820 int vector_len = 1; 3821 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3822 %} 3823 ins_pipe( fpu_reg_reg ); 3824%} 3825 3826// ====================GENERIC REPLICATE========================================== 3827 3828// Replicate byte scalar to be vector 3829instruct Repl4B(vec dst, rRegI src) %{ 3830 predicate(n->as_Vector()->length() == 4); 3831 match(Set dst (ReplicateB src)); 3832 format %{ "movd $dst,$src\n\t" 3833 "punpcklbw $dst,$dst\n\t" 3834 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3835 ins_encode %{ 3836 __ movdl($dst$$XMMRegister, $src$$Register); 3837 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3838 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3839 %} 3840 ins_pipe( pipe_slow ); 3841%} 3842 3843instruct Repl8B(vec dst, rRegI src) %{ 3844 predicate(n->as_Vector()->length() == 8); 3845 match(Set dst (ReplicateB src)); 3846 format %{ "movd $dst,$src\n\t" 3847 "punpcklbw $dst,$dst\n\t" 3848 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3849 ins_encode %{ 3850 __ movdl($dst$$XMMRegister, $src$$Register); 3851 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3852 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3853 %} 3854 ins_pipe( pipe_slow ); 3855%} 3856 3857// Replicate byte scalar immediate to be vector by loading from const table. 3858instruct Repl4B_imm(vec dst, immI con) %{ 3859 predicate(n->as_Vector()->length() == 4); 3860 match(Set dst (ReplicateB con)); 3861 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3862 ins_encode %{ 3863 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3864 %} 3865 ins_pipe( pipe_slow ); 3866%} 3867 3868instruct Repl8B_imm(vec dst, immI con) %{ 3869 predicate(n->as_Vector()->length() == 8); 3870 match(Set dst (ReplicateB con)); 3871 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3872 ins_encode %{ 3873 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3874 %} 3875 ins_pipe( pipe_slow ); 3876%} 3877 3878// Replicate byte scalar zero to be vector 3879instruct Repl4B_zero(vec dst, immI0 zero) %{ 3880 predicate(n->as_Vector()->length() == 4); 3881 match(Set dst (ReplicateB zero)); 3882 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3883 ins_encode %{ 3884 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3885 %} 3886 ins_pipe( fpu_reg_reg ); 3887%} 3888 3889instruct Repl8B_zero(vec dst, immI0 zero) %{ 3890 predicate(n->as_Vector()->length() == 8); 3891 match(Set dst (ReplicateB zero)); 3892 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3893 ins_encode %{ 3894 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3895 %} 3896 ins_pipe( fpu_reg_reg ); 3897%} 3898 3899instruct Repl16B_zero(vec dst, immI0 zero) %{ 3900 predicate(n->as_Vector()->length() == 16); 3901 match(Set dst (ReplicateB zero)); 3902 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3903 ins_encode %{ 3904 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3905 %} 3906 ins_pipe( fpu_reg_reg ); 3907%} 3908 3909instruct Repl32B_zero(vec dst, immI0 zero) %{ 3910 predicate(n->as_Vector()->length() == 32); 3911 match(Set dst (ReplicateB zero)); 3912 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3913 ins_encode %{ 3914 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3915 int vector_len = 1; 3916 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3917 %} 3918 ins_pipe( fpu_reg_reg ); 3919%} 3920 3921// Replicate char/short (2 byte) scalar to be vector 3922instruct Repl2S(vec dst, rRegI src) %{ 3923 predicate(n->as_Vector()->length() == 2); 3924 match(Set dst (ReplicateS src)); 3925 format %{ "movd $dst,$src\n\t" 3926 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3927 ins_encode %{ 3928 __ movdl($dst$$XMMRegister, $src$$Register); 3929 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3930 %} 3931 ins_pipe( fpu_reg_reg ); 3932%} 3933 3934// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3935instruct Repl2S_imm(vec dst, immI con) %{ 3936 predicate(n->as_Vector()->length() == 2); 3937 match(Set dst (ReplicateS con)); 3938 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3939 ins_encode %{ 3940 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3941 %} 3942 ins_pipe( fpu_reg_reg ); 3943%} 3944 3945instruct Repl4S_imm(vec dst, immI con) %{ 3946 predicate(n->as_Vector()->length() == 4); 3947 match(Set dst (ReplicateS con)); 3948 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3949 ins_encode %{ 3950 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3951 %} 3952 ins_pipe( fpu_reg_reg ); 3953%} 3954 3955// Replicate char/short (2 byte) scalar zero to be vector 3956instruct Repl2S_zero(vec dst, immI0 zero) %{ 3957 predicate(n->as_Vector()->length() == 2); 3958 match(Set dst (ReplicateS zero)); 3959 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3960 ins_encode %{ 3961 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3962 %} 3963 ins_pipe( fpu_reg_reg ); 3964%} 3965 3966instruct Repl4S_zero(vec dst, immI0 zero) %{ 3967 predicate(n->as_Vector()->length() == 4); 3968 match(Set dst (ReplicateS zero)); 3969 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3970 ins_encode %{ 3971 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3972 %} 3973 ins_pipe( fpu_reg_reg ); 3974%} 3975 3976instruct Repl8S_zero(vec dst, immI0 zero) %{ 3977 predicate(n->as_Vector()->length() == 8); 3978 match(Set dst (ReplicateS zero)); 3979 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3980 ins_encode %{ 3981 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3982 %} 3983 ins_pipe( fpu_reg_reg ); 3984%} 3985 3986instruct Repl16S_zero(vec dst, immI0 zero) %{ 3987 predicate(n->as_Vector()->length() == 16); 3988 match(Set dst (ReplicateS zero)); 3989 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3990 ins_encode %{ 3991 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3992 int vector_len = 1; 3993 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3994 %} 3995 ins_pipe( fpu_reg_reg ); 3996%} 3997 3998// Replicate integer (4 byte) scalar to be vector 3999instruct Repl2I(vec dst, rRegI src) %{ 4000 predicate(n->as_Vector()->length() == 2); 4001 match(Set dst (ReplicateI src)); 4002 format %{ "movd $dst,$src\n\t" 4003 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4004 ins_encode %{ 4005 __ movdl($dst$$XMMRegister, $src$$Register); 4006 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4007 %} 4008 ins_pipe( fpu_reg_reg ); 4009%} 4010 4011// Integer could be loaded into xmm register directly from memory. 4012instruct Repl2I_mem(vec dst, memory mem) %{ 4013 predicate(n->as_Vector()->length() == 2); 4014 match(Set dst (ReplicateI (LoadI mem))); 4015 format %{ "movd $dst,$mem\n\t" 4016 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4017 ins_encode %{ 4018 __ movdl($dst$$XMMRegister, $mem$$Address); 4019 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4020 %} 4021 ins_pipe( fpu_reg_reg ); 4022%} 4023 4024// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4025instruct Repl2I_imm(vec dst, immI con) %{ 4026 predicate(n->as_Vector()->length() == 2); 4027 match(Set dst (ReplicateI con)); 4028 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4029 ins_encode %{ 4030 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4031 %} 4032 ins_pipe( fpu_reg_reg ); 4033%} 4034 4035// Replicate integer (4 byte) scalar zero to be vector 4036instruct Repl2I_zero(vec dst, immI0 zero) %{ 4037 predicate(n->as_Vector()->length() == 2); 4038 match(Set dst (ReplicateI zero)); 4039 format %{ "pxor $dst,$dst\t! replicate2I" %} 4040 ins_encode %{ 4041 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4042 %} 4043 ins_pipe( fpu_reg_reg ); 4044%} 4045 4046instruct Repl4I_zero(vec dst, immI0 zero) %{ 4047 predicate(n->as_Vector()->length() == 4); 4048 match(Set dst (ReplicateI zero)); 4049 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4050 ins_encode %{ 4051 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4052 %} 4053 ins_pipe( fpu_reg_reg ); 4054%} 4055 4056instruct Repl8I_zero(vec dst, immI0 zero) %{ 4057 predicate(n->as_Vector()->length() == 8); 4058 match(Set dst (ReplicateI zero)); 4059 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4060 ins_encode %{ 4061 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4062 int vector_len = 1; 4063 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4064 %} 4065 ins_pipe( fpu_reg_reg ); 4066%} 4067 4068// Replicate long (8 byte) scalar to be vector 4069#ifdef _LP64 4070instruct Repl2L(vec dst, rRegL src) %{ 4071 predicate(n->as_Vector()->length() == 2); 4072 match(Set dst (ReplicateL src)); 4073 format %{ "movdq $dst,$src\n\t" 4074 "punpcklqdq $dst,$dst\t! replicate2L" %} 4075 ins_encode %{ 4076 __ movdq($dst$$XMMRegister, $src$$Register); 4077 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4078 %} 4079 ins_pipe( pipe_slow ); 4080%} 4081#else // _LP64 4082instruct Repl2L(vec dst, eRegL src, vec tmp) %{ 4083 predicate(n->as_Vector()->length() == 2); 4084 match(Set dst (ReplicateL src)); 4085 effect(TEMP dst, USE src, TEMP tmp); 4086 format %{ "movdl $dst,$src.lo\n\t" 4087 "movdl $tmp,$src.hi\n\t" 4088 "punpckldq $dst,$tmp\n\t" 4089 "punpcklqdq $dst,$dst\t! replicate2L"%} 4090 ins_encode %{ 4091 __ movdl($dst$$XMMRegister, $src$$Register); 4092 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4093 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4094 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4095 %} 4096 ins_pipe( pipe_slow ); 4097%} 4098#endif // _LP64 4099 4100// Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4101instruct Repl2L_imm(vec dst, immL con) %{ 4102 predicate(n->as_Vector()->length() == 2); 4103 match(Set dst (ReplicateL con)); 4104 format %{ "movq $dst,[$constantaddress]\n\t" 4105 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4106 ins_encode %{ 4107 __ movq($dst$$XMMRegister, $constantaddress($con)); 4108 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4109 %} 4110 ins_pipe( pipe_slow ); 4111%} 4112 4113// Replicate long (8 byte) scalar zero to be vector 4114instruct Repl2L_zero(vec dst, immL0 zero) %{ 4115 predicate(n->as_Vector()->length() == 2); 4116 match(Set dst (ReplicateL zero)); 4117 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4118 ins_encode %{ 4119 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4120 %} 4121 ins_pipe( fpu_reg_reg ); 4122%} 4123 4124instruct Repl4L_zero(vec dst, immL0 zero) %{ 4125 predicate(n->as_Vector()->length() == 4); 4126 match(Set dst (ReplicateL zero)); 4127 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4128 ins_encode %{ 4129 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4130 int vector_len = 1; 4131 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4132 %} 4133 ins_pipe( fpu_reg_reg ); 4134%} 4135 4136// Replicate float (4 byte) scalar to be vector 4137instruct Repl2F(vec dst, vlRegF src) %{ 4138 predicate(n->as_Vector()->length() == 2); 4139 match(Set dst (ReplicateF src)); 4140 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4141 ins_encode %{ 4142 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4143 %} 4144 ins_pipe( fpu_reg_reg ); 4145%} 4146 4147instruct Repl4F(vec dst, vlRegF src) %{ 4148 predicate(n->as_Vector()->length() == 4); 4149 match(Set dst (ReplicateF src)); 4150 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4151 ins_encode %{ 4152 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4153 %} 4154 ins_pipe( pipe_slow ); 4155%} 4156 4157// Replicate double (8 bytes) scalar to be vector 4158instruct Repl2D(vec dst, vlRegD src) %{ 4159 predicate(n->as_Vector()->length() == 2); 4160 match(Set dst (ReplicateD src)); 4161 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4162 ins_encode %{ 4163 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4164 %} 4165 ins_pipe( pipe_slow ); 4166%} 4167 4168// ====================EVEX REPLICATE============================================= 4169 4170instruct Repl4B_mem_evex(vec dst, memory mem) %{ 4171 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4172 match(Set dst (ReplicateB (LoadB mem))); 4173 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4174 ins_encode %{ 4175 int vector_len = 0; 4176 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4177 %} 4178 ins_pipe( pipe_slow ); 4179%} 4180 4181instruct Repl8B_mem_evex(vec dst, memory mem) %{ 4182 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4183 match(Set dst (ReplicateB (LoadB mem))); 4184 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4185 ins_encode %{ 4186 int vector_len = 0; 4187 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4188 %} 4189 ins_pipe( pipe_slow ); 4190%} 4191 4192instruct Repl16B_evex(vec dst, rRegI src) %{ 4193 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4194 match(Set dst (ReplicateB src)); 4195 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4196 ins_encode %{ 4197 int vector_len = 0; 4198 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4199 %} 4200 ins_pipe( pipe_slow ); 4201%} 4202 4203instruct Repl16B_mem_evex(vec dst, memory mem) %{ 4204 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4205 match(Set dst (ReplicateB (LoadB mem))); 4206 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4207 ins_encode %{ 4208 int vector_len = 0; 4209 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4210 %} 4211 ins_pipe( pipe_slow ); 4212%} 4213 4214instruct Repl32B_evex(vec dst, rRegI src) %{ 4215 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4216 match(Set dst (ReplicateB src)); 4217 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4218 ins_encode %{ 4219 int vector_len = 1; 4220 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4221 %} 4222 ins_pipe( pipe_slow ); 4223%} 4224 4225instruct Repl32B_mem_evex(vec dst, memory mem) %{ 4226 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4227 match(Set dst (ReplicateB (LoadB mem))); 4228 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4229 ins_encode %{ 4230 int vector_len = 1; 4231 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4232 %} 4233 ins_pipe( pipe_slow ); 4234%} 4235 4236instruct Repl64B_evex(vec dst, rRegI src) %{ 4237 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4238 match(Set dst (ReplicateB src)); 4239 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4240 ins_encode %{ 4241 int vector_len = 2; 4242 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4243 %} 4244 ins_pipe( pipe_slow ); 4245%} 4246 4247instruct Repl64B_mem_evex(vec dst, memory mem) %{ 4248 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4249 match(Set dst (ReplicateB (LoadB mem))); 4250 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4251 ins_encode %{ 4252 int vector_len = 2; 4253 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4254 %} 4255 ins_pipe( pipe_slow ); 4256%} 4257 4258instruct Repl16B_imm_evex(vec dst, immI con) %{ 4259 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4260 match(Set dst (ReplicateB con)); 4261 format %{ "movq $dst,[$constantaddress]\n\t" 4262 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4263 ins_encode %{ 4264 int vector_len = 0; 4265 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4266 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4267 %} 4268 ins_pipe( pipe_slow ); 4269%} 4270 4271instruct Repl32B_imm_evex(vec dst, immI con) %{ 4272 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4273 match(Set dst (ReplicateB con)); 4274 format %{ "movq $dst,[$constantaddress]\n\t" 4275 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4276 ins_encode %{ 4277 int vector_len = 1; 4278 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4279 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4280 %} 4281 ins_pipe( pipe_slow ); 4282%} 4283 4284instruct Repl64B_imm_evex(vec dst, immI con) %{ 4285 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4286 match(Set dst (ReplicateB con)); 4287 format %{ "movq $dst,[$constantaddress]\n\t" 4288 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4289 ins_encode %{ 4290 int vector_len = 2; 4291 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4292 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4293 %} 4294 ins_pipe( pipe_slow ); 4295%} 4296 4297instruct Repl64B_zero_evex(vec dst, immI0 zero) %{ 4298 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4299 match(Set dst (ReplicateB zero)); 4300 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4301 ins_encode %{ 4302 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4303 int vector_len = 2; 4304 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4305 %} 4306 ins_pipe( fpu_reg_reg ); 4307%} 4308 4309instruct Repl4S_evex(vec dst, rRegI src) %{ 4310 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4311 match(Set dst (ReplicateS src)); 4312 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4313 ins_encode %{ 4314 int vector_len = 0; 4315 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4316 %} 4317 ins_pipe( pipe_slow ); 4318%} 4319 4320instruct Repl4S_mem_evex(vec dst, memory mem) %{ 4321 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4322 match(Set dst (ReplicateS (LoadS mem))); 4323 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4324 ins_encode %{ 4325 int vector_len = 0; 4326 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4327 %} 4328 ins_pipe( pipe_slow ); 4329%} 4330 4331instruct Repl8S_evex(vec dst, rRegI src) %{ 4332 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4333 match(Set dst (ReplicateS src)); 4334 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4335 ins_encode %{ 4336 int vector_len = 0; 4337 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4338 %} 4339 ins_pipe( pipe_slow ); 4340%} 4341 4342instruct Repl8S_mem_evex(vec dst, memory mem) %{ 4343 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4344 match(Set dst (ReplicateS (LoadS mem))); 4345 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4346 ins_encode %{ 4347 int vector_len = 0; 4348 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4349 %} 4350 ins_pipe( pipe_slow ); 4351%} 4352 4353instruct Repl16S_evex(vec dst, rRegI src) %{ 4354 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4355 match(Set dst (ReplicateS src)); 4356 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4357 ins_encode %{ 4358 int vector_len = 1; 4359 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4360 %} 4361 ins_pipe( pipe_slow ); 4362%} 4363 4364instruct Repl16S_mem_evex(vec dst, memory mem) %{ 4365 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4366 match(Set dst (ReplicateS (LoadS mem))); 4367 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4368 ins_encode %{ 4369 int vector_len = 1; 4370 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4371 %} 4372 ins_pipe( pipe_slow ); 4373%} 4374 4375instruct Repl32S_evex(vec dst, rRegI src) %{ 4376 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4377 match(Set dst (ReplicateS src)); 4378 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4379 ins_encode %{ 4380 int vector_len = 2; 4381 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4382 %} 4383 ins_pipe( pipe_slow ); 4384%} 4385 4386instruct Repl32S_mem_evex(vec dst, memory mem) %{ 4387 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4388 match(Set dst (ReplicateS (LoadS mem))); 4389 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4390 ins_encode %{ 4391 int vector_len = 2; 4392 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4393 %} 4394 ins_pipe( pipe_slow ); 4395%} 4396 4397instruct Repl8S_imm_evex(vec dst, immI con) %{ 4398 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4399 match(Set dst (ReplicateS con)); 4400 format %{ "movq $dst,[$constantaddress]\n\t" 4401 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4402 ins_encode %{ 4403 int vector_len = 0; 4404 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4405 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4406 %} 4407 ins_pipe( pipe_slow ); 4408%} 4409 4410instruct Repl16S_imm_evex(vec dst, immI con) %{ 4411 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4412 match(Set dst (ReplicateS con)); 4413 format %{ "movq $dst,[$constantaddress]\n\t" 4414 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4415 ins_encode %{ 4416 int vector_len = 1; 4417 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4418 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4419 %} 4420 ins_pipe( pipe_slow ); 4421%} 4422 4423instruct Repl32S_imm_evex(vec dst, immI con) %{ 4424 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4425 match(Set dst (ReplicateS con)); 4426 format %{ "movq $dst,[$constantaddress]\n\t" 4427 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4428 ins_encode %{ 4429 int vector_len = 2; 4430 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4431 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4432 %} 4433 ins_pipe( pipe_slow ); 4434%} 4435 4436instruct Repl32S_zero_evex(vec dst, immI0 zero) %{ 4437 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4438 match(Set dst (ReplicateS zero)); 4439 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4440 ins_encode %{ 4441 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4442 int vector_len = 2; 4443 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4444 %} 4445 ins_pipe( fpu_reg_reg ); 4446%} 4447 4448instruct Repl4I_evex(vec dst, rRegI src) %{ 4449 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4450 match(Set dst (ReplicateI src)); 4451 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4452 ins_encode %{ 4453 int vector_len = 0; 4454 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4455 %} 4456 ins_pipe( pipe_slow ); 4457%} 4458 4459instruct Repl4I_mem_evex(vec dst, memory mem) %{ 4460 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4461 match(Set dst (ReplicateI (LoadI mem))); 4462 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4463 ins_encode %{ 4464 int vector_len = 0; 4465 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4466 %} 4467 ins_pipe( pipe_slow ); 4468%} 4469 4470instruct Repl8I_evex(vec dst, rRegI src) %{ 4471 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4472 match(Set dst (ReplicateI src)); 4473 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4474 ins_encode %{ 4475 int vector_len = 1; 4476 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4477 %} 4478 ins_pipe( pipe_slow ); 4479%} 4480 4481instruct Repl8I_mem_evex(vec dst, memory mem) %{ 4482 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4483 match(Set dst (ReplicateI (LoadI mem))); 4484 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4485 ins_encode %{ 4486 int vector_len = 1; 4487 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4488 %} 4489 ins_pipe( pipe_slow ); 4490%} 4491 4492instruct Repl16I_evex(vec dst, rRegI src) %{ 4493 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4494 match(Set dst (ReplicateI src)); 4495 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4496 ins_encode %{ 4497 int vector_len = 2; 4498 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4499 %} 4500 ins_pipe( pipe_slow ); 4501%} 4502 4503instruct Repl16I_mem_evex(vec dst, memory mem) %{ 4504 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4505 match(Set dst (ReplicateI (LoadI mem))); 4506 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4507 ins_encode %{ 4508 int vector_len = 2; 4509 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4510 %} 4511 ins_pipe( pipe_slow ); 4512%} 4513 4514instruct Repl4I_imm_evex(vec dst, immI con) %{ 4515 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4516 match(Set dst (ReplicateI con)); 4517 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4518 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4519 ins_encode %{ 4520 int vector_len = 0; 4521 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4522 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4523 %} 4524 ins_pipe( pipe_slow ); 4525%} 4526 4527instruct Repl8I_imm_evex(vec dst, immI con) %{ 4528 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4529 match(Set dst (ReplicateI con)); 4530 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4531 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4532 ins_encode %{ 4533 int vector_len = 1; 4534 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4535 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4536 %} 4537 ins_pipe( pipe_slow ); 4538%} 4539 4540instruct Repl16I_imm_evex(vec dst, immI con) %{ 4541 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4542 match(Set dst (ReplicateI con)); 4543 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4544 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4545 ins_encode %{ 4546 int vector_len = 2; 4547 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4548 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4549 %} 4550 ins_pipe( pipe_slow ); 4551%} 4552 4553instruct Repl16I_zero_evex(vec dst, immI0 zero) %{ 4554 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4555 match(Set dst (ReplicateI zero)); 4556 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4557 ins_encode %{ 4558 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4559 int vector_len = 2; 4560 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4561 %} 4562 ins_pipe( fpu_reg_reg ); 4563%} 4564 4565// Replicate long (8 byte) scalar to be vector 4566#ifdef _LP64 4567instruct Repl4L_evex(vec dst, rRegL src) %{ 4568 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4569 match(Set dst (ReplicateL src)); 4570 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4571 ins_encode %{ 4572 int vector_len = 1; 4573 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4574 %} 4575 ins_pipe( pipe_slow ); 4576%} 4577 4578instruct Repl8L_evex(vec dst, rRegL src) %{ 4579 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4580 match(Set dst (ReplicateL src)); 4581 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4582 ins_encode %{ 4583 int vector_len = 2; 4584 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4585 %} 4586 ins_pipe( pipe_slow ); 4587%} 4588#else // _LP64 4589instruct Repl4L_evex(vec dst, eRegL src, regD tmp) %{ 4590 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4591 match(Set dst (ReplicateL src)); 4592 effect(TEMP dst, USE src, TEMP tmp); 4593 format %{ "movdl $dst,$src.lo\n\t" 4594 "movdl $tmp,$src.hi\n\t" 4595 "punpckldq $dst,$tmp\n\t" 4596 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4597 ins_encode %{ 4598 int vector_len = 1; 4599 __ movdl($dst$$XMMRegister, $src$$Register); 4600 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4601 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4602 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4603 %} 4604 ins_pipe( pipe_slow ); 4605%} 4606 4607instruct Repl8L_evex(legVec dst, eRegL src, legVec tmp) %{ 4608 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4609 match(Set dst (ReplicateL src)); 4610 effect(TEMP dst, USE src, TEMP tmp); 4611 format %{ "movdl $dst,$src.lo\n\t" 4612 "movdl $tmp,$src.hi\n\t" 4613 "punpckldq $dst,$tmp\n\t" 4614 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4615 ins_encode %{ 4616 int vector_len = 2; 4617 __ movdl($dst$$XMMRegister, $src$$Register); 4618 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4619 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4620 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4621 %} 4622 ins_pipe( pipe_slow ); 4623%} 4624#endif // _LP64 4625 4626instruct Repl4L_imm_evex(vec dst, immL con) %{ 4627 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4628 match(Set dst (ReplicateL con)); 4629 format %{ "movq $dst,[$constantaddress]\n\t" 4630 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4631 ins_encode %{ 4632 int vector_len = 1; 4633 __ movq($dst$$XMMRegister, $constantaddress($con)); 4634 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4635 %} 4636 ins_pipe( pipe_slow ); 4637%} 4638 4639instruct Repl8L_imm_evex(vec dst, immL con) %{ 4640 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4641 match(Set dst (ReplicateL con)); 4642 format %{ "movq $dst,[$constantaddress]\n\t" 4643 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4644 ins_encode %{ 4645 int vector_len = 2; 4646 __ movq($dst$$XMMRegister, $constantaddress($con)); 4647 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4648 %} 4649 ins_pipe( pipe_slow ); 4650%} 4651 4652instruct Repl2L_mem_evex(vec dst, memory mem) %{ 4653 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4654 match(Set dst (ReplicateL (LoadL mem))); 4655 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4656 ins_encode %{ 4657 int vector_len = 0; 4658 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4659 %} 4660 ins_pipe( pipe_slow ); 4661%} 4662 4663instruct Repl4L_mem_evex(vec dst, memory mem) %{ 4664 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4665 match(Set dst (ReplicateL (LoadL mem))); 4666 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4667 ins_encode %{ 4668 int vector_len = 1; 4669 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4670 %} 4671 ins_pipe( pipe_slow ); 4672%} 4673 4674instruct Repl8L_mem_evex(vec dst, memory mem) %{ 4675 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4676 match(Set dst (ReplicateL (LoadL mem))); 4677 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4678 ins_encode %{ 4679 int vector_len = 2; 4680 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4681 %} 4682 ins_pipe( pipe_slow ); 4683%} 4684 4685instruct Repl8L_zero_evex(vec dst, immL0 zero) %{ 4686 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4687 match(Set dst (ReplicateL zero)); 4688 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4689 ins_encode %{ 4690 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4691 int vector_len = 2; 4692 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4693 %} 4694 ins_pipe( fpu_reg_reg ); 4695%} 4696 4697instruct Repl8F_evex(vec dst, regF src) %{ 4698 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4699 match(Set dst (ReplicateF src)); 4700 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4701 ins_encode %{ 4702 int vector_len = 1; 4703 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4704 %} 4705 ins_pipe( pipe_slow ); 4706%} 4707 4708instruct Repl8F_mem_evex(vec dst, memory mem) %{ 4709 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4710 match(Set dst (ReplicateF (LoadF mem))); 4711 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4712 ins_encode %{ 4713 int vector_len = 1; 4714 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4715 %} 4716 ins_pipe( pipe_slow ); 4717%} 4718 4719instruct Repl16F_evex(vec dst, regF src) %{ 4720 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4721 match(Set dst (ReplicateF src)); 4722 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4723 ins_encode %{ 4724 int vector_len = 2; 4725 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4726 %} 4727 ins_pipe( pipe_slow ); 4728%} 4729 4730instruct Repl16F_mem_evex(vec dst, memory mem) %{ 4731 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4732 match(Set dst (ReplicateF (LoadF mem))); 4733 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4734 ins_encode %{ 4735 int vector_len = 2; 4736 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4737 %} 4738 ins_pipe( pipe_slow ); 4739%} 4740 4741instruct Repl16F_zero_evex(vec dst, immF0 zero) %{ 4742 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4743 match(Set dst (ReplicateF zero)); 4744 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4745 ins_encode %{ 4746 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4747 int vector_len = 2; 4748 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4749 %} 4750 ins_pipe( fpu_reg_reg ); 4751%} 4752 4753instruct Repl4D_evex(vec dst, regD src) %{ 4754 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4755 match(Set dst (ReplicateD src)); 4756 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4757 ins_encode %{ 4758 int vector_len = 1; 4759 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4760 %} 4761 ins_pipe( pipe_slow ); 4762%} 4763 4764instruct Repl4D_mem_evex(vec dst, memory mem) %{ 4765 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4766 match(Set dst (ReplicateD (LoadD mem))); 4767 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4768 ins_encode %{ 4769 int vector_len = 1; 4770 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4771 %} 4772 ins_pipe( pipe_slow ); 4773%} 4774 4775instruct Repl8D_evex(vec dst, regD src) %{ 4776 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4777 match(Set dst (ReplicateD src)); 4778 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4779 ins_encode %{ 4780 int vector_len = 2; 4781 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4782 %} 4783 ins_pipe( pipe_slow ); 4784%} 4785 4786instruct Repl8D_mem_evex(vec dst, memory mem) %{ 4787 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4788 match(Set dst (ReplicateD (LoadD mem))); 4789 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4790 ins_encode %{ 4791 int vector_len = 2; 4792 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4793 %} 4794 ins_pipe( pipe_slow ); 4795%} 4796 4797instruct Repl8D_zero_evex(vec dst, immD0 zero) %{ 4798 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4799 match(Set dst (ReplicateD zero)); 4800 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4801 ins_encode %{ 4802 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4803 int vector_len = 2; 4804 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4805 %} 4806 ins_pipe( fpu_reg_reg ); 4807%} 4808 4809// ====================REDUCTION ARITHMETIC======================================= 4810 4811instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4812 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 4813 match(Set dst (AddReductionVI src1 src2)); 4814 effect(TEMP tmp2, TEMP tmp); 4815 format %{ "movdqu $tmp2,$src2\n\t" 4816 "phaddd $tmp2,$tmp2\n\t" 4817 "movd $tmp,$src1\n\t" 4818 "paddd $tmp,$tmp2\n\t" 4819 "movd $dst,$tmp\t! add reduction2I" %} 4820 ins_encode %{ 4821 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4822 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4823 __ movdl($tmp$$XMMRegister, $src1$$Register); 4824 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4825 __ movdl($dst$$Register, $tmp$$XMMRegister); 4826 %} 4827 ins_pipe( pipe_slow ); 4828%} 4829 4830instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4831 predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 2); 4832 match(Set dst (AddReductionVI src1 src2)); 4833 effect(TEMP tmp, TEMP tmp2); 4834 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4835 "movd $tmp2,$src1\n\t" 4836 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4837 "movd $dst,$tmp2\t! add reduction2I" %} 4838 ins_encode %{ 4839 int vector_len = 0; 4840 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4841 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4842 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4843 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4844 %} 4845 ins_pipe( pipe_slow ); 4846%} 4847 4848instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4849 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 2); 4850 match(Set dst (AddReductionVI src1 src2)); 4851 effect(TEMP tmp, TEMP tmp2); 4852 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4853 "vpaddd $tmp,$src2,$tmp2\n\t" 4854 "movd $tmp2,$src1\n\t" 4855 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4856 "movd $dst,$tmp2\t! add reduction2I" %} 4857 ins_encode %{ 4858 int vector_len = 0; 4859 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4860 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4861 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4862 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4863 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4864 %} 4865 ins_pipe( pipe_slow ); 4866%} 4867 4868instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4869 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 4870 match(Set dst (AddReductionVI src1 src2)); 4871 effect(TEMP tmp, TEMP tmp2); 4872 format %{ "movdqu $tmp,$src2\n\t" 4873 "phaddd $tmp,$tmp\n\t" 4874 "phaddd $tmp,$tmp\n\t" 4875 "movd $tmp2,$src1\n\t" 4876 "paddd $tmp2,$tmp\n\t" 4877 "movd $dst,$tmp2\t! add reduction4I" %} 4878 ins_encode %{ 4879 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4880 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4881 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4882 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4883 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4884 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4885 %} 4886 ins_pipe( pipe_slow ); 4887%} 4888 4889instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4890 predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 4); 4891 match(Set dst (AddReductionVI src1 src2)); 4892 effect(TEMP tmp, TEMP tmp2); 4893 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4894 "vphaddd $tmp,$tmp,$tmp\n\t" 4895 "movd $tmp2,$src1\n\t" 4896 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4897 "movd $dst,$tmp2\t! add reduction4I" %} 4898 ins_encode %{ 4899 int vector_len = 0; 4900 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4901 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4902 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4903 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4904 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4905 %} 4906 ins_pipe( pipe_slow ); 4907%} 4908 4909instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4910 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 4); 4911 match(Set dst (AddReductionVI src1 src2)); 4912 effect(TEMP tmp, TEMP tmp2); 4913 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4914 "vpaddd $tmp,$src2,$tmp2\n\t" 4915 "pshufd $tmp2,$tmp,0x1\n\t" 4916 "vpaddd $tmp,$tmp,$tmp2\n\t" 4917 "movd $tmp2,$src1\n\t" 4918 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4919 "movd $dst,$tmp2\t! add reduction4I" %} 4920 ins_encode %{ 4921 int vector_len = 0; 4922 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4923 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4924 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4925 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4926 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4927 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4928 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4929 %} 4930 ins_pipe( pipe_slow ); 4931%} 4932 4933instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4934 predicate(UseAVX > 0 && VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->length() == 8); 4935 match(Set dst (AddReductionVI src1 src2)); 4936 effect(TEMP tmp, TEMP tmp2); 4937 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4938 "vphaddd $tmp,$tmp,$tmp2\n\t" 4939 "vextracti128_high $tmp2,$tmp\n\t" 4940 "vpaddd $tmp,$tmp,$tmp2\n\t" 4941 "movd $tmp2,$src1\n\t" 4942 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4943 "movd $dst,$tmp2\t! add reduction8I" %} 4944 ins_encode %{ 4945 int vector_len = 1; 4946 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4947 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4948 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4949 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4950 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4951 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4952 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4953 %} 4954 ins_pipe( pipe_slow ); 4955%} 4956 4957instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 4958 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8); 4959 match(Set dst (AddReductionVI src1 src2)); 4960 effect(TEMP tmp, TEMP tmp2); 4961 format %{ "vextracti128_high $tmp,$src2\n\t" 4962 "vpaddd $tmp,$tmp,$src2\n\t" 4963 "pshufd $tmp2,$tmp,0xE\n\t" 4964 "vpaddd $tmp,$tmp,$tmp2\n\t" 4965 "pshufd $tmp2,$tmp,0x1\n\t" 4966 "vpaddd $tmp,$tmp,$tmp2\n\t" 4967 "movd $tmp2,$src1\n\t" 4968 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4969 "movd $dst,$tmp2\t! add reduction8I" %} 4970 ins_encode %{ 4971 int vector_len = 0; 4972 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4973 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4974 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4975 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4976 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4977 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4978 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4979 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4980 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4981 %} 4982 ins_pipe( pipe_slow ); 4983%} 4984 4985instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{ 4986 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16); 4987 match(Set dst (AddReductionVI src1 src2)); 4988 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4989 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4990 "vpaddd $tmp3,$tmp3,$src2\n\t" 4991 "vextracti128_high $tmp,$tmp3\n\t" 4992 "vpaddd $tmp,$tmp,$tmp3\n\t" 4993 "pshufd $tmp2,$tmp,0xE\n\t" 4994 "vpaddd $tmp,$tmp,$tmp2\n\t" 4995 "pshufd $tmp2,$tmp,0x1\n\t" 4996 "vpaddd $tmp,$tmp,$tmp2\n\t" 4997 "movd $tmp2,$src1\n\t" 4998 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4999 "movd $dst,$tmp2\t! mul reduction16I" %} 5000 ins_encode %{ 5001 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5002 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5003 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5004 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5005 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5006 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5007 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5008 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5009 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5010 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5011 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5012 %} 5013 ins_pipe( pipe_slow ); 5014%} 5015 5016#ifdef _LP64 5017instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{ 5018 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5019 match(Set dst (AddReductionVL src1 src2)); 5020 effect(TEMP tmp, TEMP tmp2); 5021 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5022 "vpaddq $tmp,$src2,$tmp2\n\t" 5023 "movdq $tmp2,$src1\n\t" 5024 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5025 "movdq $dst,$tmp2\t! add reduction2L" %} 5026 ins_encode %{ 5027 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5028 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5029 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5030 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5031 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5032 %} 5033 ins_pipe( pipe_slow ); 5034%} 5035 5036instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{ 5037 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5038 match(Set dst (AddReductionVL src1 src2)); 5039 effect(TEMP tmp, TEMP tmp2); 5040 format %{ "vextracti128_high $tmp,$src2\n\t" 5041 "vpaddq $tmp2,$tmp,$src2\n\t" 5042 "pshufd $tmp,$tmp2,0xE\n\t" 5043 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5044 "movdq $tmp,$src1\n\t" 5045 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5046 "movdq $dst,$tmp2\t! add reduction4L" %} 5047 ins_encode %{ 5048 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5049 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5050 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5051 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5052 __ movdq($tmp$$XMMRegister, $src1$$Register); 5053 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5054 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5055 %} 5056 ins_pipe( pipe_slow ); 5057%} 5058 5059instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{ 5060 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8); 5061 match(Set dst (AddReductionVL src1 src2)); 5062 effect(TEMP tmp, TEMP tmp2); 5063 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5064 "vpaddq $tmp2,$tmp2,$src2\n\t" 5065 "vextracti128_high $tmp,$tmp2\n\t" 5066 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5067 "pshufd $tmp,$tmp2,0xE\n\t" 5068 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5069 "movdq $tmp,$src1\n\t" 5070 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5071 "movdq $dst,$tmp2\t! add reduction8L" %} 5072 ins_encode %{ 5073 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5074 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5075 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5076 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5077 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5078 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5079 __ movdq($tmp$$XMMRegister, $src1$$Register); 5080 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5081 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5082 %} 5083 ins_pipe( pipe_slow ); 5084%} 5085#endif 5086 5087instruct rsadd2F_reduction_reg(regF dst, vec src2, vec tmp) %{ 5088 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5089 match(Set dst (AddReductionVF dst src2)); 5090 effect(TEMP dst, TEMP tmp); 5091 format %{ "addss $dst,$src2\n\t" 5092 "pshufd $tmp,$src2,0x01\n\t" 5093 "addss $dst,$tmp\t! add reduction2F" %} 5094 ins_encode %{ 5095 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5096 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5097 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5098 %} 5099 ins_pipe( pipe_slow ); 5100%} 5101 5102instruct rvadd2F_reduction_reg(regF dst, vec src2, vec tmp) %{ 5103 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5104 match(Set dst (AddReductionVF dst src2)); 5105 effect(TEMP dst, TEMP tmp); 5106 format %{ "vaddss $dst,$dst,$src2\n\t" 5107 "pshufd $tmp,$src2,0x01\n\t" 5108 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5109 ins_encode %{ 5110 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5111 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5112 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5113 %} 5114 ins_pipe( pipe_slow ); 5115%} 5116 5117instruct rsadd4F_reduction_reg(regF dst, vec src2, vec tmp) %{ 5118 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5119 match(Set dst (AddReductionVF dst src2)); 5120 effect(TEMP dst, TEMP tmp); 5121 format %{ "addss $dst,$src2\n\t" 5122 "pshufd $tmp,$src2,0x01\n\t" 5123 "addss $dst,$tmp\n\t" 5124 "pshufd $tmp,$src2,0x02\n\t" 5125 "addss $dst,$tmp\n\t" 5126 "pshufd $tmp,$src2,0x03\n\t" 5127 "addss $dst,$tmp\t! add reduction4F" %} 5128 ins_encode %{ 5129 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5130 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5131 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5132 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5133 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5134 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5135 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5136 %} 5137 ins_pipe( pipe_slow ); 5138%} 5139 5140instruct rvadd4F_reduction_reg(regF dst, vec src2, vec tmp) %{ 5141 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5142 match(Set dst (AddReductionVF dst src2)); 5143 effect(TEMP tmp, TEMP dst); 5144 format %{ "vaddss $dst,dst,$src2\n\t" 5145 "pshufd $tmp,$src2,0x01\n\t" 5146 "vaddss $dst,$dst,$tmp\n\t" 5147 "pshufd $tmp,$src2,0x02\n\t" 5148 "vaddss $dst,$dst,$tmp\n\t" 5149 "pshufd $tmp,$src2,0x03\n\t" 5150 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5151 ins_encode %{ 5152 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5153 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5154 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5155 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5156 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5157 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5158 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5159 %} 5160 ins_pipe( pipe_slow ); 5161%} 5162 5163instruct radd8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{ 5164 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 8); 5165 match(Set dst (AddReductionVF dst src2)); 5166 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5167 format %{ "vaddss $dst,$dst,$src2\n\t" 5168 "pshufd $tmp,$src2,0x01\n\t" 5169 "vaddss $dst,$dst,$tmp\n\t" 5170 "pshufd $tmp,$src2,0x02\n\t" 5171 "vaddss $dst,$dst,$tmp\n\t" 5172 "pshufd $tmp,$src2,0x03\n\t" 5173 "vaddss $dst,$dst,$tmp\n\t" 5174 "vextractf128_high $tmp2,$src2\n\t" 5175 "vaddss $dst,$dst,$tmp2\n\t" 5176 "pshufd $tmp,$tmp2,0x01\n\t" 5177 "vaddss $dst,$dst,$tmp\n\t" 5178 "pshufd $tmp,$tmp2,0x02\n\t" 5179 "vaddss $dst,$dst,$tmp\n\t" 5180 "pshufd $tmp,$tmp2,0x03\n\t" 5181 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5182 ins_encode %{ 5183 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5184 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5185 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5186 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5187 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5188 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5189 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5190 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5191 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5192 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5193 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5194 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5195 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5196 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5197 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5198 %} 5199 ins_pipe( pipe_slow ); 5200%} 5201 5202instruct radd16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %{ 5203 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16); 5204 match(Set dst (AddReductionVF dst src2)); 5205 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5206 format %{ "vaddss $dst,$dst,$src2\n\t" 5207 "pshufd $tmp,$src2,0x01\n\t" 5208 "vaddss $dst,$dst,$tmp\n\t" 5209 "pshufd $tmp,$src2,0x02\n\t" 5210 "vaddss $dst,$dst,$tmp\n\t" 5211 "pshufd $tmp,$src2,0x03\n\t" 5212 "vaddss $dst,$dst,$tmp\n\t" 5213 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5214 "vaddss $dst,$dst,$tmp2\n\t" 5215 "pshufd $tmp,$tmp2,0x01\n\t" 5216 "vaddss $dst,$dst,$tmp\n\t" 5217 "pshufd $tmp,$tmp2,0x02\n\t" 5218 "vaddss $dst,$dst,$tmp\n\t" 5219 "pshufd $tmp,$tmp2,0x03\n\t" 5220 "vaddss $dst,$dst,$tmp\n\t" 5221 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5222 "vaddss $dst,$dst,$tmp2\n\t" 5223 "pshufd $tmp,$tmp2,0x01\n\t" 5224 "vaddss $dst,$dst,$tmp\n\t" 5225 "pshufd $tmp,$tmp2,0x02\n\t" 5226 "vaddss $dst,$dst,$tmp\n\t" 5227 "pshufd $tmp,$tmp2,0x03\n\t" 5228 "vaddss $dst,$dst,$tmp\n\t" 5229 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5230 "vaddss $dst,$dst,$tmp2\n\t" 5231 "pshufd $tmp,$tmp2,0x01\n\t" 5232 "vaddss $dst,$dst,$tmp\n\t" 5233 "pshufd $tmp,$tmp2,0x02\n\t" 5234 "vaddss $dst,$dst,$tmp\n\t" 5235 "pshufd $tmp,$tmp2,0x03\n\t" 5236 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5237 ins_encode %{ 5238 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5239 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5240 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5241 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5242 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5243 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5244 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5245 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5246 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5247 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5248 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5249 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5250 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5251 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5252 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5253 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5254 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5255 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5256 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5257 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5258 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5259 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5260 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5261 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5262 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5263 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5264 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5265 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5266 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5267 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5268 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5269 %} 5270 ins_pipe( pipe_slow ); 5271%} 5272 5273instruct rsadd2D_reduction_reg(regD dst, vec src2, vec tmp) %{ 5274 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5275 match(Set dst (AddReductionVD dst src2)); 5276 effect(TEMP tmp, TEMP dst); 5277 format %{ "addsd $dst,$src2\n\t" 5278 "pshufd $tmp,$src2,0xE\n\t" 5279 "addsd $dst,$tmp\t! add reduction2D" %} 5280 ins_encode %{ 5281 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5282 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5283 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5284 %} 5285 ins_pipe( pipe_slow ); 5286%} 5287 5288instruct rvadd2D_reduction_reg(regD dst, vec src2, vec tmp) %{ 5289 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5290 match(Set dst (AddReductionVD dst src2)); 5291 effect(TEMP tmp, TEMP dst); 5292 format %{ "vaddsd $dst,$dst,$src2\n\t" 5293 "pshufd $tmp,$src2,0xE\n\t" 5294 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5295 ins_encode %{ 5296 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5297 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5298 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5299 %} 5300 ins_pipe( pipe_slow ); 5301%} 5302 5303instruct rvadd4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{ 5304 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5305 match(Set dst (AddReductionVD dst src2)); 5306 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5307 format %{ "vaddsd $dst,$dst,$src2\n\t" 5308 "pshufd $tmp,$src2,0xE\n\t" 5309 "vaddsd $dst,$dst,$tmp\n\t" 5310 "vextractf128 $tmp2,$src2,0x1\n\t" 5311 "vaddsd $dst,$dst,$tmp2\n\t" 5312 "pshufd $tmp,$tmp2,0xE\n\t" 5313 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5314 ins_encode %{ 5315 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5316 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5317 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5318 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5319 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5320 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5321 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5322 %} 5323 ins_pipe( pipe_slow ); 5324%} 5325 5326instruct rvadd8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %{ 5327 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8); 5328 match(Set dst (AddReductionVD dst src2)); 5329 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5330 format %{ "vaddsd $dst,$dst,$src2\n\t" 5331 "pshufd $tmp,$src2,0xE\n\t" 5332 "vaddsd $dst,$dst,$tmp\n\t" 5333 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5334 "vaddsd $dst,$dst,$tmp2\n\t" 5335 "pshufd $tmp,$tmp2,0xE\n\t" 5336 "vaddsd $dst,$dst,$tmp\n\t" 5337 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5338 "vaddsd $dst,$dst,$tmp2\n\t" 5339 "pshufd $tmp,$tmp2,0xE\n\t" 5340 "vaddsd $dst,$dst,$tmp\n\t" 5341 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5342 "vaddsd $dst,$dst,$tmp2\n\t" 5343 "pshufd $tmp,$tmp2,0xE\n\t" 5344 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5345 ins_encode %{ 5346 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5347 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5348 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5349 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5350 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5351 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5352 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5353 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5354 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5355 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5356 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5357 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5358 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5359 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5360 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5361 %} 5362 ins_pipe( pipe_slow ); 5363%} 5364 5365instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 5366 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5367 match(Set dst (MulReductionVI src1 src2)); 5368 effect(TEMP tmp, TEMP tmp2); 5369 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5370 "pmulld $tmp2,$src2\n\t" 5371 "movd $tmp,$src1\n\t" 5372 "pmulld $tmp2,$tmp\n\t" 5373 "movd $dst,$tmp2\t! mul reduction2I" %} 5374 ins_encode %{ 5375 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5376 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5377 __ movdl($tmp$$XMMRegister, $src1$$Register); 5378 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5379 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5380 %} 5381 ins_pipe( pipe_slow ); 5382%} 5383 5384instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 5385 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5386 match(Set dst (MulReductionVI src1 src2)); 5387 effect(TEMP tmp, TEMP tmp2); 5388 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5389 "vpmulld $tmp,$src2,$tmp2\n\t" 5390 "movd $tmp2,$src1\n\t" 5391 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5392 "movd $dst,$tmp2\t! mul reduction2I" %} 5393 ins_encode %{ 5394 int vector_len = 0; 5395 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5396 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5397 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5398 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5399 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402%} 5403 5404instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 5405 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5406 match(Set dst (MulReductionVI src1 src2)); 5407 effect(TEMP tmp, TEMP tmp2); 5408 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5409 "pmulld $tmp2,$src2\n\t" 5410 "pshufd $tmp,$tmp2,0x1\n\t" 5411 "pmulld $tmp2,$tmp\n\t" 5412 "movd $tmp,$src1\n\t" 5413 "pmulld $tmp2,$tmp\n\t" 5414 "movd $dst,$tmp2\t! mul reduction4I" %} 5415 ins_encode %{ 5416 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5417 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5418 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5419 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5420 __ movdl($tmp$$XMMRegister, $src1$$Register); 5421 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5422 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5423 %} 5424 ins_pipe( pipe_slow ); 5425%} 5426 5427instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 5428 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5429 match(Set dst (MulReductionVI src1 src2)); 5430 effect(TEMP tmp, TEMP tmp2); 5431 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5432 "vpmulld $tmp,$src2,$tmp2\n\t" 5433 "pshufd $tmp2,$tmp,0x1\n\t" 5434 "vpmulld $tmp,$tmp,$tmp2\n\t" 5435 "movd $tmp2,$src1\n\t" 5436 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5437 "movd $dst,$tmp2\t! mul reduction4I" %} 5438 ins_encode %{ 5439 int vector_len = 0; 5440 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5441 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5442 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5443 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5444 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5445 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5446 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5447 %} 5448 ins_pipe( pipe_slow ); 5449%} 5450 5451instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vec src2, vec tmp, vec tmp2) %{ 5452 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->length() == 8); 5453 match(Set dst (MulReductionVI src1 src2)); 5454 effect(TEMP tmp, TEMP tmp2); 5455 format %{ "vextracti128_high $tmp,$src2\n\t" 5456 "vpmulld $tmp,$tmp,$src2\n\t" 5457 "pshufd $tmp2,$tmp,0xE\n\t" 5458 "vpmulld $tmp,$tmp,$tmp2\n\t" 5459 "pshufd $tmp2,$tmp,0x1\n\t" 5460 "vpmulld $tmp,$tmp,$tmp2\n\t" 5461 "movd $tmp2,$src1\n\t" 5462 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5463 "movd $dst,$tmp2\t! mul reduction8I" %} 5464 ins_encode %{ 5465 int vector_len = 0; 5466 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5467 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5468 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5469 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5470 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5471 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5472 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5473 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5474 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5475 %} 5476 ins_pipe( pipe_slow ); 5477%} 5478 5479instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVec src2, legVec tmp, legVec tmp2, legVec tmp3) %{ 5480 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16); 5481 match(Set dst (MulReductionVI src1 src2)); 5482 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5483 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5484 "vpmulld $tmp3,$tmp3,$src2\n\t" 5485 "vextracti128_high $tmp,$tmp3\n\t" 5486 "vpmulld $tmp,$tmp,$src2\n\t" 5487 "pshufd $tmp2,$tmp,0xE\n\t" 5488 "vpmulld $tmp,$tmp,$tmp2\n\t" 5489 "pshufd $tmp2,$tmp,0x1\n\t" 5490 "vpmulld $tmp,$tmp,$tmp2\n\t" 5491 "movd $tmp2,$src1\n\t" 5492 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5493 "movd $dst,$tmp2\t! mul reduction16I" %} 5494 ins_encode %{ 5495 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5496 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5497 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5498 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5499 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5500 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5501 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5502 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5503 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5504 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5505 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5506 %} 5507 ins_pipe( pipe_slow ); 5508%} 5509 5510#ifdef _LP64 5511instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{ 5512 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 2); 5513 match(Set dst (MulReductionVL src1 src2)); 5514 effect(TEMP tmp, TEMP tmp2); 5515 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5516 "vpmullq $tmp,$src2,$tmp2\n\t" 5517 "movdq $tmp2,$src1\n\t" 5518 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5519 "movdq $dst,$tmp2\t! mul reduction2L" %} 5520 ins_encode %{ 5521 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5522 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5523 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5524 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5525 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5526 %} 5527 ins_pipe( pipe_slow ); 5528%} 5529 5530instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vec src2, vec tmp, vec tmp2) %{ 5531 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 4); 5532 match(Set dst (MulReductionVL src1 src2)); 5533 effect(TEMP tmp, TEMP tmp2); 5534 format %{ "vextracti128_high $tmp,$src2\n\t" 5535 "vpmullq $tmp2,$tmp,$src2\n\t" 5536 "pshufd $tmp,$tmp2,0xE\n\t" 5537 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5538 "movdq $tmp,$src1\n\t" 5539 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5540 "movdq $dst,$tmp2\t! mul reduction4L" %} 5541 ins_encode %{ 5542 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5543 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5544 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5545 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5546 __ movdq($tmp$$XMMRegister, $src1$$Register); 5547 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5548 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5549 %} 5550 ins_pipe( pipe_slow ); 5551%} 5552 5553instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVec src2, legVec tmp, legVec tmp2) %{ 5554 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->in(2)->bottom_type()->is_vect()->length() == 8); 5555 match(Set dst (MulReductionVL src1 src2)); 5556 effect(TEMP tmp, TEMP tmp2); 5557 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5558 "vpmullq $tmp2,$tmp2,$src2\n\t" 5559 "vextracti128_high $tmp,$tmp2\n\t" 5560 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5561 "pshufd $tmp,$tmp2,0xE\n\t" 5562 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5563 "movdq $tmp,$src1\n\t" 5564 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5565 "movdq $dst,$tmp2\t! mul reduction8L" %} 5566 ins_encode %{ 5567 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5568 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5569 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5570 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5571 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5572 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5573 __ movdq($tmp$$XMMRegister, $src1$$Register); 5574 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5575 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5576 %} 5577 ins_pipe( pipe_slow ); 5578%} 5579#endif 5580 5581instruct rsmul2F_reduction(regF dst, vec src2, vec tmp) %{ 5582 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5583 match(Set dst (MulReductionVF dst src2)); 5584 effect(TEMP dst, TEMP tmp); 5585 format %{ "mulss $dst,$src2\n\t" 5586 "pshufd $tmp,$src2,0x01\n\t" 5587 "mulss $dst,$tmp\t! mul reduction2F" %} 5588 ins_encode %{ 5589 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5590 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5591 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5592 %} 5593 ins_pipe( pipe_slow ); 5594%} 5595 5596instruct rvmul2F_reduction_reg(regF dst, vec src2, vec tmp) %{ 5597 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5598 match(Set dst (MulReductionVF dst src2)); 5599 effect(TEMP tmp, TEMP dst); 5600 format %{ "vmulss $dst,$dst,$src2\n\t" 5601 "pshufd $tmp,$src2,0x01\n\t" 5602 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5603 ins_encode %{ 5604 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5605 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5606 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5607 %} 5608 ins_pipe( pipe_slow ); 5609%} 5610 5611instruct rsmul4F_reduction_reg(regF dst, vec src2, vec tmp) %{ 5612 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5613 match(Set dst (MulReductionVF dst src2)); 5614 effect(TEMP dst, TEMP tmp); 5615 format %{ "mulss $dst,$src2\n\t" 5616 "pshufd $tmp,$src2,0x01\n\t" 5617 "mulss $dst,$tmp\n\t" 5618 "pshufd $tmp,$src2,0x02\n\t" 5619 "mulss $dst,$tmp\n\t" 5620 "pshufd $tmp,$src2,0x03\n\t" 5621 "mulss $dst,$tmp\t! mul reduction4F" %} 5622 ins_encode %{ 5623 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5624 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5625 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5626 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5627 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5628 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5629 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5630 %} 5631 ins_pipe( pipe_slow ); 5632%} 5633 5634instruct rvmul4F_reduction_reg(regF dst, vec src2, vec tmp) %{ 5635 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5636 match(Set dst (MulReductionVF dst src2)); 5637 effect(TEMP tmp, TEMP dst); 5638 format %{ "vmulss $dst,$dst,$src2\n\t" 5639 "pshufd $tmp,$src2,0x01\n\t" 5640 "vmulss $dst,$dst,$tmp\n\t" 5641 "pshufd $tmp,$src2,0x02\n\t" 5642 "vmulss $dst,$dst,$tmp\n\t" 5643 "pshufd $tmp,$src2,0x03\n\t" 5644 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5645 ins_encode %{ 5646 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5647 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5648 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5649 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5650 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5651 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5652 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5653 %} 5654 ins_pipe( pipe_slow ); 5655%} 5656 5657instruct rvmul8F_reduction_reg(regF dst, vec src2, vec tmp, vec tmp2) %{ 5658 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 8); 5659 match(Set dst (MulReductionVF dst src2)); 5660 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5661 format %{ "vmulss $dst,$dst,$src2\n\t" 5662 "pshufd $tmp,$src2,0x01\n\t" 5663 "vmulss $dst,$dst,$tmp\n\t" 5664 "pshufd $tmp,$src2,0x02\n\t" 5665 "vmulss $dst,$dst,$tmp\n\t" 5666 "pshufd $tmp,$src2,0x03\n\t" 5667 "vmulss $dst,$dst,$tmp\n\t" 5668 "vextractf128_high $tmp2,$src2\n\t" 5669 "vmulss $dst,$dst,$tmp2\n\t" 5670 "pshufd $tmp,$tmp2,0x01\n\t" 5671 "vmulss $dst,$dst,$tmp\n\t" 5672 "pshufd $tmp,$tmp2,0x02\n\t" 5673 "vmulss $dst,$dst,$tmp\n\t" 5674 "pshufd $tmp,$tmp2,0x03\n\t" 5675 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5676 ins_encode %{ 5677 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5678 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5679 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5680 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5681 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5682 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5683 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5684 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5685 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5686 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5687 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5688 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5689 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5690 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5691 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5692 %} 5693 ins_pipe( pipe_slow ); 5694%} 5695 5696instruct rvmul16F_reduction_reg(regF dst, legVec src2, legVec tmp, legVec tmp2) %{ 5697 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 16); 5698 match(Set dst (MulReductionVF dst src2)); 5699 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5700 format %{ "vmulss $dst,$dst,$src2\n\t" 5701 "pshufd $tmp,$src2,0x01\n\t" 5702 "vmulss $dst,$dst,$tmp\n\t" 5703 "pshufd $tmp,$src2,0x02\n\t" 5704 "vmulss $dst,$dst,$tmp\n\t" 5705 "pshufd $tmp,$src2,0x03\n\t" 5706 "vmulss $dst,$dst,$tmp\n\t" 5707 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5708 "vmulss $dst,$dst,$tmp2\n\t" 5709 "pshufd $tmp,$tmp2,0x01\n\t" 5710 "vmulss $dst,$dst,$tmp\n\t" 5711 "pshufd $tmp,$tmp2,0x02\n\t" 5712 "vmulss $dst,$dst,$tmp\n\t" 5713 "pshufd $tmp,$tmp2,0x03\n\t" 5714 "vmulss $dst,$dst,$tmp\n\t" 5715 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5716 "vmulss $dst,$dst,$tmp2\n\t" 5717 "pshufd $tmp,$tmp2,0x01\n\t" 5718 "vmulss $dst,$dst,$tmp\n\t" 5719 "pshufd $tmp,$tmp2,0x02\n\t" 5720 "vmulss $dst,$dst,$tmp\n\t" 5721 "pshufd $tmp,$tmp2,0x03\n\t" 5722 "vmulss $dst,$dst,$tmp\n\t" 5723 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5724 "vmulss $dst,$dst,$tmp2\n\t" 5725 "pshufd $tmp,$tmp2,0x01\n\t" 5726 "vmulss $dst,$dst,$tmp\n\t" 5727 "pshufd $tmp,$tmp2,0x02\n\t" 5728 "vmulss $dst,$dst,$tmp\n\t" 5729 "pshufd $tmp,$tmp2,0x03\n\t" 5730 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5731 ins_encode %{ 5732 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5733 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5734 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5735 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5736 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5737 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5738 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5739 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5740 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5741 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5742 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5743 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5744 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5745 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5746 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5747 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5748 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5749 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5750 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5751 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5752 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5753 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5754 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5755 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5756 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5757 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5758 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5759 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5760 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5761 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5762 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5763 %} 5764 ins_pipe( pipe_slow ); 5765%} 5766 5767instruct rsmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{ 5768 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5769 match(Set dst (MulReductionVD dst src2)); 5770 effect(TEMP dst, TEMP tmp); 5771 format %{ "mulsd $dst,$src2\n\t" 5772 "pshufd $tmp,$src2,0xE\n\t" 5773 "mulsd $dst,$tmp\t! mul reduction2D" %} 5774 ins_encode %{ 5775 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5776 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5777 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5778 %} 5779 ins_pipe( pipe_slow ); 5780%} 5781 5782instruct rvmul2D_reduction_reg(regD dst, vec src2, vec tmp) %{ 5783 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 2); 5784 match(Set dst (MulReductionVD dst src2)); 5785 effect(TEMP tmp, TEMP dst); 5786 format %{ "vmulsd $dst,$dst,$src2\n\t" 5787 "pshufd $tmp,$src2,0xE\n\t" 5788 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5789 ins_encode %{ 5790 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5791 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5792 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5793 %} 5794 ins_pipe( pipe_slow ); 5795%} 5796 5797instruct rvmul4D_reduction_reg(regD dst, vec src2, vec tmp, vec tmp2) %{ 5798 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->length() == 4); 5799 match(Set dst (MulReductionVD dst src2)); 5800 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5801 format %{ "vmulsd $dst,$dst,$src2\n\t" 5802 "pshufd $tmp,$src2,0xE\n\t" 5803 "vmulsd $dst,$dst,$tmp\n\t" 5804 "vextractf128_high $tmp2,$src2\n\t" 5805 "vmulsd $dst,$dst,$tmp2\n\t" 5806 "pshufd $tmp,$tmp2,0xE\n\t" 5807 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5808 ins_encode %{ 5809 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5810 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5811 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5812 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5813 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5814 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5815 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5816 %} 5817 ins_pipe( pipe_slow ); 5818%} 5819 5820instruct rvmul8D_reduction_reg(regD dst, legVec src2, legVec tmp, legVec tmp2) %{ 5821 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->length() == 8); 5822 match(Set dst (MulReductionVD dst src2)); 5823 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5824 format %{ "vmulsd $dst,$dst,$src2\n\t" 5825 "pshufd $tmp,$src2,0xE\n\t" 5826 "vmulsd $dst,$dst,$tmp\n\t" 5827 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5828 "vmulsd $dst,$dst,$tmp2\n\t" 5829 "pshufd $tmp,$src2,0xE\n\t" 5830 "vmulsd $dst,$dst,$tmp\n\t" 5831 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5832 "vmulsd $dst,$dst,$tmp2\n\t" 5833 "pshufd $tmp,$tmp2,0xE\n\t" 5834 "vmulsd $dst,$dst,$tmp\n\t" 5835 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5836 "vmulsd $dst,$dst,$tmp2\n\t" 5837 "pshufd $tmp,$tmp2,0xE\n\t" 5838 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5839 ins_encode %{ 5840 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5841 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5842 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5843 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5844 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5845 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5846 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5847 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5848 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5849 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5850 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5851 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5852 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5853 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5854 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5855 %} 5856 ins_pipe( pipe_slow ); 5857%} 5858 5859// ====================VECTOR ARITHMETIC======================================= 5860 5861// --------------------------------- ADD -------------------------------------- 5862 5863// Bytes vector add 5864instruct vaddB(vec dst, vec src) %{ 5865 predicate(UseAVX == 0); 5866 match(Set dst (AddVB dst src)); 5867 format %{ "paddb $dst,$src\t! add packedB" %} 5868 ins_encode %{ 5869 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5870 %} 5871 ins_pipe( pipe_slow ); 5872%} 5873 5874instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5875 predicate(UseAVX > 0); 5876 match(Set dst (AddVB src1 src2)); 5877 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5878 ins_encode %{ 5879 int vector_len = vector_length_encoding(this); 5880 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5881 %} 5882 ins_pipe( pipe_slow ); 5883%} 5884 5885instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5886 predicate(UseAVX > 0); 5887 match(Set dst (AddVB src (LoadVector mem))); 5888 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5889 ins_encode %{ 5890 int vector_len = vector_length_encoding(this); 5891 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5892 %} 5893 ins_pipe( pipe_slow ); 5894%} 5895 5896// Shorts/Chars vector add 5897instruct vaddS(vec dst, vec src) %{ 5898 predicate(UseAVX == 0); 5899 match(Set dst (AddVS dst src)); 5900 format %{ "paddw $dst,$src\t! add packedS" %} 5901 ins_encode %{ 5902 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5903 %} 5904 ins_pipe( pipe_slow ); 5905%} 5906 5907instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5908 predicate(UseAVX > 0); 5909 match(Set dst (AddVS src1 src2)); 5910 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5911 ins_encode %{ 5912 int vector_len = vector_length_encoding(this); 5913 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5914 %} 5915 ins_pipe( pipe_slow ); 5916%} 5917 5918instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5919 predicate(UseAVX > 0); 5920 match(Set dst (AddVS src (LoadVector mem))); 5921 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5922 ins_encode %{ 5923 int vector_len = vector_length_encoding(this); 5924 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5925 %} 5926 ins_pipe( pipe_slow ); 5927%} 5928 5929// Integers vector add 5930instruct vaddI(vec dst, vec src) %{ 5931 predicate(UseAVX == 0); 5932 match(Set dst (AddVI dst src)); 5933 format %{ "paddd $dst,$src\t! add packedI" %} 5934 ins_encode %{ 5935 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5936 %} 5937 ins_pipe( pipe_slow ); 5938%} 5939 5940instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5941 predicate(UseAVX > 0); 5942 match(Set dst (AddVI src1 src2)); 5943 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5944 ins_encode %{ 5945 int vector_len = vector_length_encoding(this); 5946 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5947 %} 5948 ins_pipe( pipe_slow ); 5949%} 5950 5951 5952instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5953 predicate(UseAVX > 0); 5954 match(Set dst (AddVI src (LoadVector mem))); 5955 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5956 ins_encode %{ 5957 int vector_len = vector_length_encoding(this); 5958 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5959 %} 5960 ins_pipe( pipe_slow ); 5961%} 5962 5963// Longs vector add 5964instruct vaddL(vec dst, vec src) %{ 5965 predicate(UseAVX == 0); 5966 match(Set dst (AddVL dst src)); 5967 format %{ "paddq $dst,$src\t! add packedL" %} 5968 ins_encode %{ 5969 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5970 %} 5971 ins_pipe( pipe_slow ); 5972%} 5973 5974instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5975 predicate(UseAVX > 0); 5976 match(Set dst (AddVL src1 src2)); 5977 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5978 ins_encode %{ 5979 int vector_len = vector_length_encoding(this); 5980 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5981 %} 5982 ins_pipe( pipe_slow ); 5983%} 5984 5985instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5986 predicate(UseAVX > 0); 5987 match(Set dst (AddVL src (LoadVector mem))); 5988 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5989 ins_encode %{ 5990 int vector_len = vector_length_encoding(this); 5991 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5992 %} 5993 ins_pipe( pipe_slow ); 5994%} 5995 5996// Floats vector add 5997instruct vaddF(vec dst, vec src) %{ 5998 predicate(UseAVX == 0); 5999 match(Set dst (AddVF dst src)); 6000 format %{ "addps $dst,$src\t! add packedF" %} 6001 ins_encode %{ 6002 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6003 %} 6004 ins_pipe( pipe_slow ); 6005%} 6006 6007instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 6008 predicate(UseAVX > 0); 6009 match(Set dst (AddVF src1 src2)); 6010 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 6011 ins_encode %{ 6012 int vector_len = vector_length_encoding(this); 6013 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6014 %} 6015 ins_pipe( pipe_slow ); 6016%} 6017 6018instruct vaddF_mem(vec dst, vec src, memory mem) %{ 6019 predicate(UseAVX > 0); 6020 match(Set dst (AddVF src (LoadVector mem))); 6021 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 6022 ins_encode %{ 6023 int vector_len = vector_length_encoding(this); 6024 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6025 %} 6026 ins_pipe( pipe_slow ); 6027%} 6028 6029// Doubles vector add 6030instruct vaddD(vec dst, vec src) %{ 6031 predicate(UseAVX == 0); 6032 match(Set dst (AddVD dst src)); 6033 format %{ "addpd $dst,$src\t! add packedD" %} 6034 ins_encode %{ 6035 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6036 %} 6037 ins_pipe( pipe_slow ); 6038%} 6039 6040instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 6041 predicate(UseAVX > 0); 6042 match(Set dst (AddVD src1 src2)); 6043 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 6044 ins_encode %{ 6045 int vector_len = vector_length_encoding(this); 6046 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6047 %} 6048 ins_pipe( pipe_slow ); 6049%} 6050 6051instruct vaddD_mem(vec dst, vec src, memory mem) %{ 6052 predicate(UseAVX > 0); 6053 match(Set dst (AddVD src (LoadVector mem))); 6054 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 6055 ins_encode %{ 6056 int vector_len = vector_length_encoding(this); 6057 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6058 %} 6059 ins_pipe( pipe_slow ); 6060%} 6061 6062// --------------------------------- SUB -------------------------------------- 6063 6064// Bytes vector sub 6065instruct vsubB(vec dst, vec src) %{ 6066 predicate(UseAVX == 0); 6067 match(Set dst (SubVB dst src)); 6068 format %{ "psubb $dst,$src\t! sub packedB" %} 6069 ins_encode %{ 6070 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6071 %} 6072 ins_pipe( pipe_slow ); 6073%} 6074 6075instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 6076 predicate(UseAVX > 0); 6077 match(Set dst (SubVB src1 src2)); 6078 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 6079 ins_encode %{ 6080 int vector_len = vector_length_encoding(this); 6081 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6082 %} 6083 ins_pipe( pipe_slow ); 6084%} 6085 6086instruct vsubB_mem(vec dst, vec src, memory mem) %{ 6087 predicate(UseAVX > 0); 6088 match(Set dst (SubVB src (LoadVector mem))); 6089 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 6090 ins_encode %{ 6091 int vector_len = vector_length_encoding(this); 6092 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6093 %} 6094 ins_pipe( pipe_slow ); 6095%} 6096 6097// Shorts/Chars vector sub 6098instruct vsubS(vec dst, vec src) %{ 6099 predicate(UseAVX == 0); 6100 match(Set dst (SubVS dst src)); 6101 format %{ "psubw $dst,$src\t! sub packedS" %} 6102 ins_encode %{ 6103 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6104 %} 6105 ins_pipe( pipe_slow ); 6106%} 6107 6108 6109instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 6110 predicate(UseAVX > 0); 6111 match(Set dst (SubVS src1 src2)); 6112 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 6113 ins_encode %{ 6114 int vector_len = vector_length_encoding(this); 6115 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6116 %} 6117 ins_pipe( pipe_slow ); 6118%} 6119 6120instruct vsubS_mem(vec dst, vec src, memory mem) %{ 6121 predicate(UseAVX > 0); 6122 match(Set dst (SubVS src (LoadVector mem))); 6123 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 6124 ins_encode %{ 6125 int vector_len = vector_length_encoding(this); 6126 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6127 %} 6128 ins_pipe( pipe_slow ); 6129%} 6130 6131// Integers vector sub 6132instruct vsubI(vec dst, vec src) %{ 6133 predicate(UseAVX == 0); 6134 match(Set dst (SubVI dst src)); 6135 format %{ "psubd $dst,$src\t! sub packedI" %} 6136 ins_encode %{ 6137 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6138 %} 6139 ins_pipe( pipe_slow ); 6140%} 6141 6142instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 6143 predicate(UseAVX > 0); 6144 match(Set dst (SubVI src1 src2)); 6145 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 6146 ins_encode %{ 6147 int vector_len = vector_length_encoding(this); 6148 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6149 %} 6150 ins_pipe( pipe_slow ); 6151%} 6152 6153instruct vsubI_mem(vec dst, vec src, memory mem) %{ 6154 predicate(UseAVX > 0); 6155 match(Set dst (SubVI src (LoadVector mem))); 6156 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 6157 ins_encode %{ 6158 int vector_len = vector_length_encoding(this); 6159 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6160 %} 6161 ins_pipe( pipe_slow ); 6162%} 6163 6164// Longs vector sub 6165instruct vsubL(vec dst, vec src) %{ 6166 predicate(UseAVX == 0); 6167 match(Set dst (SubVL dst src)); 6168 format %{ "psubq $dst,$src\t! sub packedL" %} 6169 ins_encode %{ 6170 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6171 %} 6172 ins_pipe( pipe_slow ); 6173%} 6174 6175instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 6176 predicate(UseAVX > 0); 6177 match(Set dst (SubVL src1 src2)); 6178 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 6179 ins_encode %{ 6180 int vector_len = vector_length_encoding(this); 6181 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6182 %} 6183 ins_pipe( pipe_slow ); 6184%} 6185 6186 6187instruct vsubL_mem(vec dst, vec src, memory mem) %{ 6188 predicate(UseAVX > 0); 6189 match(Set dst (SubVL src (LoadVector mem))); 6190 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 6191 ins_encode %{ 6192 int vector_len = vector_length_encoding(this); 6193 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6194 %} 6195 ins_pipe( pipe_slow ); 6196%} 6197 6198// Floats vector sub 6199instruct vsubF(vec dst, vec src) %{ 6200 predicate(UseAVX == 0); 6201 match(Set dst (SubVF dst src)); 6202 format %{ "subps $dst,$src\t! sub packedF" %} 6203 ins_encode %{ 6204 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6205 %} 6206 ins_pipe( pipe_slow ); 6207%} 6208 6209instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 6210 predicate(UseAVX > 0); 6211 match(Set dst (SubVF src1 src2)); 6212 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 6213 ins_encode %{ 6214 int vector_len = vector_length_encoding(this); 6215 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6216 %} 6217 ins_pipe( pipe_slow ); 6218%} 6219 6220instruct vsubF_mem(vec dst, vec src, memory mem) %{ 6221 predicate(UseAVX > 0); 6222 match(Set dst (SubVF src (LoadVector mem))); 6223 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 6224 ins_encode %{ 6225 int vector_len = vector_length_encoding(this); 6226 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6227 %} 6228 ins_pipe( pipe_slow ); 6229%} 6230 6231// Doubles vector sub 6232instruct vsubD(vec dst, vec src) %{ 6233 predicate(UseAVX == 0); 6234 match(Set dst (SubVD dst src)); 6235 format %{ "subpd $dst,$src\t! sub packedD" %} 6236 ins_encode %{ 6237 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6238 %} 6239 ins_pipe( pipe_slow ); 6240%} 6241 6242instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6243 predicate(UseAVX > 0); 6244 match(Set dst (SubVD src1 src2)); 6245 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6246 ins_encode %{ 6247 int vector_len = vector_length_encoding(this); 6248 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6249 %} 6250 ins_pipe( pipe_slow ); 6251%} 6252 6253instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6254 predicate(UseAVX > 0); 6255 match(Set dst (SubVD src (LoadVector mem))); 6256 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6257 ins_encode %{ 6258 int vector_len = vector_length_encoding(this); 6259 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6260 %} 6261 ins_pipe( pipe_slow ); 6262%} 6263 6264// --------------------------------- MUL -------------------------------------- 6265 6266// Byte vector mul 6267instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 6268 predicate(n->as_Vector()->length() == 4 || 6269 n->as_Vector()->length() == 8); 6270 match(Set dst (MulVB src1 src2)); 6271 effect(TEMP dst, TEMP tmp, TEMP scratch); 6272 format %{"vector_mulB $dst,$src1,$src2" %} 6273 ins_encode %{ 6274 assert(UseSSE > 3, "required"); 6275 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 6276 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 6277 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 6278 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6279 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6280 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6281 %} 6282 ins_pipe( pipe_slow ); 6283%} 6284 6285instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 6286 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 6287 match(Set dst (MulVB src1 src2)); 6288 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6289 format %{"vector_mulB $dst,$src1,$src2" %} 6290 ins_encode %{ 6291 assert(UseSSE > 3, "required"); 6292 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 6293 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 6294 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 6295 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 6296 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 6297 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 6298 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 6299 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 6300 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6301 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6302 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6303 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6304 %} 6305 ins_pipe( pipe_slow ); 6306%} 6307 6308instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 6309 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 6310 match(Set dst (MulVB src1 src2)); 6311 effect(TEMP dst, TEMP tmp, TEMP scratch); 6312 format %{"vector_mulB $dst,$src1,$src2" %} 6313 ins_encode %{ 6314 int vector_len = Assembler::AVX_256bit; 6315 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 6316 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 6317 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 6318 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6319 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 6320 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 6321 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 6322 %} 6323 ins_pipe( pipe_slow ); 6324%} 6325 6326instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 6327 predicate(n->as_Vector()->length() == 32); 6328 match(Set dst (MulVB src1 src2)); 6329 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6330 format %{"vector_mulB $dst,$src1,$src2" %} 6331 ins_encode %{ 6332 assert(UseAVX > 1, "required"); 6333 int vector_len = Assembler::AVX_256bit; 6334 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 6335 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 6336 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 6337 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 6338 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 6339 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 6340 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 6341 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 6342 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6343 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 6344 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 6345 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6346 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 6347 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 6348 %} 6349 ins_pipe( pipe_slow ); 6350%} 6351 6352instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 6353 predicate(n->as_Vector()->length() == 64); 6354 match(Set dst (MulVB src1 src2)); 6355 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6356 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 6357 ins_encode %{ 6358 assert(UseAVX > 2, "required"); 6359 int vector_len = Assembler::AVX_512bit; 6360 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 6361 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 6362 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 6363 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 6364 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 6365 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 6366 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 6367 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 6368 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6369 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 6370 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 6371 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 6372 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6373 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 6374 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 6375 %} 6376 ins_pipe( pipe_slow ); 6377%} 6378 6379// Shorts/Chars vector mul 6380instruct vmulS(vec dst, vec src) %{ 6381 predicate(UseAVX == 0); 6382 match(Set dst (MulVS dst src)); 6383 format %{ "pmullw $dst,$src\t! mul packedS" %} 6384 ins_encode %{ 6385 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6386 %} 6387 ins_pipe( pipe_slow ); 6388%} 6389 6390instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6391 predicate(UseAVX > 0); 6392 match(Set dst (MulVS src1 src2)); 6393 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6394 ins_encode %{ 6395 int vector_len = vector_length_encoding(this); 6396 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6397 %} 6398 ins_pipe( pipe_slow ); 6399%} 6400 6401instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6402 predicate(UseAVX > 0); 6403 match(Set dst (MulVS src (LoadVector mem))); 6404 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6405 ins_encode %{ 6406 int vector_len = vector_length_encoding(this); 6407 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6408 %} 6409 ins_pipe( pipe_slow ); 6410%} 6411 6412// Integers vector mul 6413instruct vmulI(vec dst, vec src) %{ 6414 predicate(UseAVX == 0); 6415 match(Set dst (MulVI dst src)); 6416 format %{ "pmulld $dst,$src\t! mul packedI" %} 6417 ins_encode %{ 6418 assert(UseSSE > 3, "required"); 6419 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6420 %} 6421 ins_pipe( pipe_slow ); 6422%} 6423 6424instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6425 predicate(UseAVX > 0); 6426 match(Set dst (MulVI src1 src2)); 6427 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6428 ins_encode %{ 6429 int vector_len = vector_length_encoding(this); 6430 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6431 %} 6432 ins_pipe( pipe_slow ); 6433%} 6434 6435instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6436 predicate(UseAVX > 0); 6437 match(Set dst (MulVI src (LoadVector mem))); 6438 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6439 ins_encode %{ 6440 int vector_len = vector_length_encoding(this); 6441 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6442 %} 6443 ins_pipe( pipe_slow ); 6444%} 6445 6446// Longs vector mul 6447instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 6448 match(Set dst (MulVL src1 src2)); 6449 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 6450 ins_encode %{ 6451 assert(UseAVX > 2, "required"); 6452 int vector_len = vector_length_encoding(this); 6453 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6454 %} 6455 ins_pipe( pipe_slow ); 6456%} 6457 6458instruct vmulL_mem(vec dst, vec src, memory mem) %{ 6459 match(Set dst (MulVL src (LoadVector mem))); 6460 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 6461 ins_encode %{ 6462 assert(UseAVX > 2, "required"); 6463 int vector_len = vector_length_encoding(this); 6464 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6465 %} 6466 ins_pipe( pipe_slow ); 6467%} 6468 6469// Floats vector mul 6470instruct vmulF(vec dst, vec src) %{ 6471 predicate(UseAVX == 0); 6472 match(Set dst (MulVF dst src)); 6473 format %{ "mulps $dst,$src\t! mul packedF" %} 6474 ins_encode %{ 6475 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6476 %} 6477 ins_pipe( pipe_slow ); 6478%} 6479 6480instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6481 predicate(UseAVX > 0); 6482 match(Set dst (MulVF src1 src2)); 6483 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6484 ins_encode %{ 6485 int vector_len = vector_length_encoding(this); 6486 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6487 %} 6488 ins_pipe( pipe_slow ); 6489%} 6490 6491instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6492 predicate(UseAVX > 0); 6493 match(Set dst (MulVF src (LoadVector mem))); 6494 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6495 ins_encode %{ 6496 int vector_len = vector_length_encoding(this); 6497 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6498 %} 6499 ins_pipe( pipe_slow ); 6500%} 6501 6502// Doubles vector mul 6503instruct vmulD(vec dst, vec src) %{ 6504 predicate(UseAVX == 0); 6505 match(Set dst (MulVD dst src)); 6506 format %{ "mulpd $dst,$src\t! mul packedD" %} 6507 ins_encode %{ 6508 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6509 %} 6510 ins_pipe( pipe_slow ); 6511%} 6512 6513instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6514 predicate(UseAVX > 0); 6515 match(Set dst (MulVD src1 src2)); 6516 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6517 ins_encode %{ 6518 int vector_len = vector_length_encoding(this); 6519 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6520 %} 6521 ins_pipe( pipe_slow ); 6522%} 6523 6524instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6525 predicate(UseAVX > 0); 6526 match(Set dst (MulVD src (LoadVector mem))); 6527 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6528 ins_encode %{ 6529 int vector_len = vector_length_encoding(this); 6530 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6531 %} 6532 ins_pipe( pipe_slow ); 6533%} 6534 6535instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 6536 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6537 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 6538 effect(TEMP dst, USE src1, USE src2); 6539 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 6540 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 6541 %} 6542 ins_encode %{ 6543 int vector_len = 1; 6544 int cond = (Assembler::Condition)($copnd$$cmpcode); 6545 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 6546 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 6547 %} 6548 ins_pipe( pipe_slow ); 6549%} 6550 6551instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 6552 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6553 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 6554 effect(TEMP dst, USE src1, USE src2); 6555 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 6556 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 6557 %} 6558 ins_encode %{ 6559 int vector_len = 1; 6560 int cond = (Assembler::Condition)($copnd$$cmpcode); 6561 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 6562 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 6563 %} 6564 ins_pipe( pipe_slow ); 6565%} 6566 6567// --------------------------------- DIV -------------------------------------- 6568 6569// Floats vector div 6570instruct vdivF(vec dst, vec src) %{ 6571 predicate(UseAVX == 0); 6572 match(Set dst (DivVF dst src)); 6573 format %{ "divps $dst,$src\t! div packedF" %} 6574 ins_encode %{ 6575 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6576 %} 6577 ins_pipe( pipe_slow ); 6578%} 6579 6580instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6581 predicate(UseAVX > 0); 6582 match(Set dst (DivVF src1 src2)); 6583 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6584 ins_encode %{ 6585 int vector_len = vector_length_encoding(this); 6586 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6587 %} 6588 ins_pipe( pipe_slow ); 6589%} 6590 6591instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6592 predicate(UseAVX > 0); 6593 match(Set dst (DivVF src (LoadVector mem))); 6594 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6595 ins_encode %{ 6596 int vector_len = vector_length_encoding(this); 6597 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6598 %} 6599 ins_pipe( pipe_slow ); 6600%} 6601 6602// Doubles vector div 6603instruct vdivD(vec dst, vec src) %{ 6604 predicate(UseAVX == 0); 6605 match(Set dst (DivVD dst src)); 6606 format %{ "divpd $dst,$src\t! div packedD" %} 6607 ins_encode %{ 6608 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6609 %} 6610 ins_pipe( pipe_slow ); 6611%} 6612 6613instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6614 predicate(UseAVX > 0); 6615 match(Set dst (DivVD src1 src2)); 6616 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6617 ins_encode %{ 6618 int vector_len = vector_length_encoding(this); 6619 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6620 %} 6621 ins_pipe( pipe_slow ); 6622%} 6623 6624instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6625 predicate(UseAVX > 0); 6626 match(Set dst (DivVD src (LoadVector mem))); 6627 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6628 ins_encode %{ 6629 int vector_len = vector_length_encoding(this); 6630 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6631 %} 6632 ins_pipe( pipe_slow ); 6633%} 6634 6635// --------------------------------- Sqrt -------------------------------------- 6636 6637instruct vsqrtF_reg(vec dst, vec src) %{ 6638 match(Set dst (SqrtVF src)); 6639 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6640 ins_encode %{ 6641 assert(UseAVX > 0, "required"); 6642 int vector_len = vector_length_encoding(this); 6643 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 6644 %} 6645 ins_pipe( pipe_slow ); 6646%} 6647 6648instruct vsqrtF_mem(vec dst, memory mem) %{ 6649 match(Set dst (SqrtVF (LoadVector mem))); 6650 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6651 ins_encode %{ 6652 assert(UseAVX > 0, "required"); 6653 int vector_len = vector_length_encoding(this); 6654 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 6655 %} 6656 ins_pipe( pipe_slow ); 6657%} 6658 6659// Floating point vector sqrt 6660instruct vsqrtD_reg(vec dst, vec src) %{ 6661 match(Set dst (SqrtVD src)); 6662 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6663 ins_encode %{ 6664 assert(UseAVX > 0, "required"); 6665 int vector_len = vector_length_encoding(this); 6666 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 6667 %} 6668 ins_pipe( pipe_slow ); 6669%} 6670 6671instruct vsqrtD_mem(vec dst, memory mem) %{ 6672 match(Set dst (SqrtVD (LoadVector mem))); 6673 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6674 ins_encode %{ 6675 assert(UseAVX > 0, "required"); 6676 int vector_len = vector_length_encoding(this); 6677 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 6678 %} 6679 ins_pipe( pipe_slow ); 6680%} 6681 6682// ------------------------------ Shift --------------------------------------- 6683 6684// Left and right shift count vectors are the same on x86 6685// (only lowest bits of xmm reg are used for count). 6686instruct vshiftcnt(vec dst, rRegI cnt) %{ 6687 match(Set dst (LShiftCntV cnt)); 6688 match(Set dst (RShiftCntV cnt)); 6689 format %{ "movdl $dst,$cnt\t! load shift count" %} 6690 ins_encode %{ 6691 __ movdl($dst$$XMMRegister, $cnt$$Register); 6692 %} 6693 ins_pipe( pipe_slow ); 6694%} 6695 6696instruct vshiftcntimm(vec dst, immI8 cnt, rRegI tmp) %{ 6697 match(Set dst cnt); 6698 effect(TEMP tmp); 6699 format %{ "movl $tmp,$cnt\t" 6700 "movdl $dst,$tmp\t! load shift count" %} 6701 ins_encode %{ 6702 __ movl($tmp$$Register, $cnt$$constant); 6703 __ movdl($dst$$XMMRegister, $tmp$$Register); 6704 %} 6705 ins_pipe( pipe_slow ); 6706%} 6707 6708// Byte vector shift 6709instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6710 predicate(n->as_Vector()->length() <= 8); 6711 match(Set dst (LShiftVB src shift)); 6712 match(Set dst (RShiftVB src shift)); 6713 match(Set dst (URShiftVB src shift)); 6714 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 6715 format %{"vector_byte_shift $dst,$src,$shift" %} 6716 ins_encode %{ 6717 assert(UseSSE > 3, "required"); 6718 int opcode = this->ideal_Opcode(); 6719 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 6720 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6721 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6722 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6723 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6724 %} 6725 ins_pipe( pipe_slow ); 6726%} 6727 6728instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6729 predicate(n->as_Vector()->length() == 16 && UseAVX <= 1); 6730 match(Set dst (LShiftVB src shift)); 6731 match(Set dst (RShiftVB src shift)); 6732 match(Set dst (URShiftVB src shift)); 6733 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 6734 format %{"vector_byte_shift $dst,$src,$shift" %} 6735 ins_encode %{ 6736 assert(UseSSE > 3, "required"); 6737 int opcode = this->ideal_Opcode(); 6738 6739 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); 6740 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6741 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6742 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6743 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6744 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6745 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6746 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6747 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6748 %} 6749 ins_pipe( pipe_slow ); 6750%} 6751 6752instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6753 predicate(n->as_Vector()->length() == 16 && UseAVX > 1); 6754 match(Set dst (LShiftVB src shift)); 6755 match(Set dst (RShiftVB src shift)); 6756 match(Set dst (URShiftVB src shift)); 6757 effect(TEMP dst, TEMP tmp, TEMP scratch); 6758 format %{"vector_byte_shift $dst,$src,$shift" %} 6759 ins_encode %{ 6760 int opcode = this->ideal_Opcode(); 6761 int vector_len = Assembler::AVX_256bit; 6762 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 6763 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 6764 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 6765 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6766 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6767 %} 6768 ins_pipe( pipe_slow ); 6769%} 6770 6771instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6772 predicate(n->as_Vector()->length() == 32); 6773 match(Set dst (LShiftVB src shift)); 6774 match(Set dst (RShiftVB src shift)); 6775 match(Set dst (URShiftVB src shift)); 6776 effect(TEMP dst, TEMP tmp, TEMP scratch); 6777 format %{"vector_byte_shift $dst,$src,$shift" %} 6778 ins_encode %{ 6779 assert(UseAVX > 1, "required"); 6780 int opcode = this->ideal_Opcode(); 6781 int vector_len = Assembler::AVX_256bit; 6782 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6783 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6784 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 6785 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 6786 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); 6787 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 6788 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 6789 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 6790 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 6791 %} 6792 ins_pipe( pipe_slow ); 6793%} 6794 6795instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6796 predicate(n->as_Vector()->length() == 64); 6797 match(Set dst (LShiftVB src shift)); 6798 match(Set dst (RShiftVB src shift)); 6799 match(Set dst (URShiftVB src shift)); 6800 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6801 format %{"vector_byte_shift $dst,$src,$shift" %} 6802 ins_encode %{ 6803 assert(UseAVX > 2, "required"); 6804 int opcode = this->ideal_Opcode(); 6805 int vector_len = Assembler::AVX_512bit; 6806 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6807 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 6808 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); 6809 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); 6810 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); 6811 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6812 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 6813 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 6814 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 6815 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6816 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 6817 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 6818 %} 6819 ins_pipe( pipe_slow ); 6820%} 6821 6822// Shorts vector logical right shift produces incorrect Java result 6823// for negative data because java code convert short value into int with 6824// sign extension before a shift. But char vectors are fine since chars are 6825// unsigned values. 6826// Shorts/Chars vector left shift 6827instruct vshiftS(vec dst, vec src, vec shift) %{ 6828 match(Set dst (LShiftVS src shift)); 6829 match(Set dst (RShiftVS src shift)); 6830 match(Set dst (URShiftVS src shift)); 6831 effect(TEMP dst, USE src, USE shift); 6832 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6833 ins_encode %{ 6834 int opcode = this->ideal_Opcode(); 6835 if (UseAVX > 0) { 6836 int vlen_enc = vector_length_encoding(this); 6837 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6838 } else { 6839 int vlen = vector_length(this); 6840 if (vlen == 2) { 6841 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6842 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6843 } else if (vlen == 4) { 6844 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6845 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6846 } else { 6847 assert (vlen == 8, "sanity"); 6848 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6849 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6850 } 6851 } 6852 %} 6853 ins_pipe( pipe_slow ); 6854%} 6855 6856// Integers vector left shift 6857instruct vshiftI(vec dst, vec src, vec shift) %{ 6858 match(Set dst (LShiftVI src shift)); 6859 match(Set dst (RShiftVI src shift)); 6860 match(Set dst (URShiftVI src shift)); 6861 effect(TEMP dst, USE src, USE shift); 6862 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6863 ins_encode %{ 6864 int opcode = this->ideal_Opcode(); 6865 if (UseAVX > 0) { 6866 int vector_len = vector_length_encoding(this); 6867 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6868 } else { 6869 int vlen = vector_length(this); 6870 if (vlen == 2) { 6871 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6872 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6873 } else { 6874 assert(vlen == 4, "sanity"); 6875 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6876 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6877 } 6878 } 6879 %} 6880 ins_pipe( pipe_slow ); 6881%} 6882 6883// Longs vector shift 6884instruct vshiftL(vec dst, vec src, vec shift) %{ 6885 match(Set dst (LShiftVL src shift)); 6886 match(Set dst (URShiftVL src shift)); 6887 effect(TEMP dst, USE src, USE shift); 6888 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6889 ins_encode %{ 6890 int opcode = this->ideal_Opcode(); 6891 if (UseAVX > 0) { 6892 int vector_len = vector_length_encoding(this); 6893 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6894 } else { 6895 assert(vector_length(this) == 2, ""); 6896 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6897 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6898 } 6899 %} 6900 ins_pipe( pipe_slow ); 6901%} 6902 6903// -------------------ArithmeticRightShift ----------------------------------- 6904// Long vector arithmetic right shift 6905instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6906 predicate(UseAVX <= 2); 6907 match(Set dst (RShiftVL src shift)); 6908 effect(TEMP dst, TEMP tmp, TEMP scratch); 6909 format %{ "vshiftq $dst,$src,$shift" %} 6910 ins_encode %{ 6911 uint vlen = vector_length(this); 6912 if (vlen == 2) { 6913 assert(UseSSE >= 2, "required"); 6914 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6915 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6916 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6917 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6918 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6919 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6920 } else { 6921 assert(vlen == 4, "sanity"); 6922 assert(UseAVX > 1, "required"); 6923 int vector_len = Assembler::AVX_256bit; 6924 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6925 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6926 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 6927 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 6928 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 6929 } 6930 %} 6931 ins_pipe( pipe_slow ); 6932%} 6933 6934instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6935 predicate(UseAVX > 2); 6936 match(Set dst (RShiftVL src shift)); 6937 format %{ "vshiftq $dst,$src,$shift" %} 6938 ins_encode %{ 6939 int vector_len = vector_length_encoding(this); 6940 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 6941 %} 6942 ins_pipe( pipe_slow ); 6943%} 6944 6945// --------------------------------- AND -------------------------------------- 6946 6947instruct vand(vec dst, vec src) %{ 6948 predicate(UseAVX == 0); 6949 match(Set dst (AndV dst src)); 6950 format %{ "pand $dst,$src\t! and vectors" %} 6951 ins_encode %{ 6952 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6953 %} 6954 ins_pipe( pipe_slow ); 6955%} 6956 6957instruct vand_reg(vec dst, vec src1, vec src2) %{ 6958 predicate(UseAVX > 0); 6959 match(Set dst (AndV src1 src2)); 6960 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6961 ins_encode %{ 6962 int vector_len = vector_length_encoding(this); 6963 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6964 %} 6965 ins_pipe( pipe_slow ); 6966%} 6967 6968instruct vand_mem(vec dst, vec src, memory mem) %{ 6969 predicate(UseAVX > 0); 6970 match(Set dst (AndV src (LoadVector mem))); 6971 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6972 ins_encode %{ 6973 int vector_len = vector_length_encoding(this); 6974 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6975 %} 6976 ins_pipe( pipe_slow ); 6977%} 6978 6979// --------------------------------- OR --------------------------------------- 6980 6981instruct vor(vec dst, vec src) %{ 6982 predicate(UseAVX == 0); 6983 match(Set dst (OrV dst src)); 6984 format %{ "por $dst,$src\t! or vectors" %} 6985 ins_encode %{ 6986 __ por($dst$$XMMRegister, $src$$XMMRegister); 6987 %} 6988 ins_pipe( pipe_slow ); 6989%} 6990 6991instruct vor_reg(vec dst, vec src1, vec src2) %{ 6992 predicate(UseAVX > 0); 6993 match(Set dst (OrV src1 src2)); 6994 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6995 ins_encode %{ 6996 int vector_len = vector_length_encoding(this); 6997 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6998 %} 6999 ins_pipe( pipe_slow ); 7000%} 7001 7002instruct vor_mem(vec dst, vec src, memory mem) %{ 7003 predicate(UseAVX > 0); 7004 match(Set dst (OrV src (LoadVector mem))); 7005 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7006 ins_encode %{ 7007 int vector_len = vector_length_encoding(this); 7008 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7009 %} 7010 ins_pipe( pipe_slow ); 7011%} 7012 7013// --------------------------------- XOR -------------------------------------- 7014 7015instruct vxor(vec dst, vec src) %{ 7016 predicate(UseAVX == 0); 7017 match(Set dst (XorV dst src)); 7018 format %{ "pxor $dst,$src\t! xor vectors" %} 7019 ins_encode %{ 7020 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7021 %} 7022 ins_pipe( pipe_slow ); 7023%} 7024 7025instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7026 predicate(UseAVX > 0); 7027 match(Set dst (XorV src1 src2)); 7028 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7029 ins_encode %{ 7030 int vector_len = vector_length_encoding(this); 7031 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7032 %} 7033 ins_pipe( pipe_slow ); 7034%} 7035 7036instruct vxor_mem(vec dst, vec src, memory mem) %{ 7037 predicate(UseAVX > 0); 7038 match(Set dst (XorV src (LoadVector mem))); 7039 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7040 ins_encode %{ 7041 int vector_len = vector_length_encoding(this); 7042 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7043 %} 7044 ins_pipe( pipe_slow ); 7045%} 7046 7047// --------------------------------- ABS -------------------------------------- 7048// a = |a| 7049instruct vabsB_reg(vec dst, vec src) %{ 7050 match(Set dst (AbsVB src)); 7051 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7052 ins_encode %{ 7053 uint vlen = vector_length(this); 7054 if (vlen <= 16) { 7055 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7056 } else { 7057 int vlen_enc = vector_length_encoding(this); 7058 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7059 } 7060 %} 7061 ins_pipe( pipe_slow ); 7062%} 7063 7064instruct vabsS_reg(vec dst, vec src) %{ 7065 match(Set dst (AbsVS src)); 7066 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7067 ins_encode %{ 7068 uint vlen = vector_length(this); 7069 if (vlen <= 8) { 7070 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7071 } else { 7072 int vlen_enc = vector_length_encoding(this); 7073 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7074 } 7075 %} 7076 ins_pipe( pipe_slow ); 7077%} 7078 7079instruct vabsI_reg(vec dst, vec src) %{ 7080 match(Set dst (AbsVI src)); 7081 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7082 ins_encode %{ 7083 uint vlen = vector_length(this); 7084 if (vlen <= 4) { 7085 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7086 } else { 7087 int vlen_enc = vector_length_encoding(this); 7088 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7089 } 7090 %} 7091 ins_pipe( pipe_slow ); 7092%} 7093 7094instruct vabsL_reg(vec dst, vec src) %{ 7095 match(Set dst (AbsVL src)); 7096 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7097 ins_encode %{ 7098 assert(UseAVX > 2, "required"); 7099 int vector_len = vector_length_encoding(this); 7100 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7101 %} 7102 ins_pipe( pipe_slow ); 7103%} 7104 7105// --------------------------------- ABSNEG -------------------------------------- 7106 7107instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 7108 predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F 7109 match(Set dst (AbsVF src)); 7110 match(Set dst (NegVF src)); 7111 effect(TEMP scratch); 7112 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7113 ins_cost(150); 7114 ins_encode %{ 7115 int opcode = this->ideal_Opcode(); 7116 int vlen = vector_length(this); 7117 if (vlen == 2) { 7118 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7119 } else { 7120 assert(vlen == 8 || vlen == 16, "required"); 7121 int vlen_enc = vector_length_encoding(this); 7122 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7123 } 7124 %} 7125 ins_pipe( pipe_slow ); 7126%} 7127 7128instruct vabsneg4F(vec dst, rRegI scratch) %{ 7129 predicate(n->as_Vector()->length() == 4); 7130 match(Set dst (AbsVF dst)); 7131 match(Set dst (NegVF dst)); 7132 effect(TEMP scratch); 7133 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7134 ins_cost(150); 7135 ins_encode %{ 7136 int opcode = this->ideal_Opcode(); 7137 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 7138 %} 7139 ins_pipe( pipe_slow ); 7140%} 7141 7142instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 7143 match(Set dst (AbsVD src)); 7144 match(Set dst (NegVD src)); 7145 effect(TEMP scratch); 7146 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7147 ins_encode %{ 7148 int opcode = this->ideal_Opcode(); 7149 uint vlen = vector_length(this); 7150 if (vlen == 2) { 7151 assert(UseSSE >= 2, "required"); 7152 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7153 } else { 7154 int vlen_enc = vector_length_encoding(this); 7155 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7156 } 7157 %} 7158 ins_pipe( pipe_slow ); 7159%} 7160 7161// --------------------------------- FMA -------------------------------------- 7162// a * b + c 7163 7164instruct vfmaF_reg(vec a, vec b, vec c) %{ 7165 match(Set c (FmaVF c (Binary a b))); 7166 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7167 ins_cost(150); 7168 ins_encode %{ 7169 assert(UseFMA, "not enabled"); 7170 int vector_len = vector_length_encoding(this); 7171 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 7172 %} 7173 ins_pipe( pipe_slow ); 7174%} 7175 7176instruct vfmaF_mem(vec a, memory b, vec c) %{ 7177 match(Set c (FmaVF c (Binary a (LoadVector b)))); 7178 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 7179 ins_cost(150); 7180 ins_encode %{ 7181 assert(UseFMA, "not enabled"); 7182 int vector_len = vector_length_encoding(this); 7183 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 7184 %} 7185 ins_pipe( pipe_slow ); 7186%} 7187 7188instruct vfmaD_reg(vec a, vec b, vec c) %{ 7189 match(Set c (FmaVD c (Binary a b))); 7190 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 7191 ins_cost(150); 7192 ins_encode %{ 7193 assert(UseFMA, "not enabled"); 7194 int vector_len = vector_length_encoding(this); 7195 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 7196 %} 7197 ins_pipe( pipe_slow ); 7198%} 7199 7200instruct vfmaD_mem(vec a, memory b, vec c) %{ 7201 match(Set c (FmaVD c (Binary a (LoadVector b)))); 7202 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 7203 ins_cost(150); 7204 ins_encode %{ 7205 assert(UseFMA, "not enabled"); 7206 int vector_len = vector_length_encoding(this); 7207 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 7208 %} 7209 ins_pipe( pipe_slow ); 7210%} 7211 7212// --------------------------------- Vector Multiply Add -------------------------------------- 7213 7214instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 7215 predicate(UseAVX == 0); 7216 match(Set dst (MulAddVS2VI dst src1)); 7217 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %} 7218 ins_encode %{ 7219 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 7220 %} 7221 ins_pipe( pipe_slow ); 7222%} 7223 7224instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 7225 predicate(UseAVX > 0); 7226 match(Set dst (MulAddVS2VI src1 src2)); 7227 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 7228 ins_encode %{ 7229 int vector_len = vector_length_encoding(this); 7230 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7231 %} 7232 ins_pipe( pipe_slow ); 7233%} 7234 7235// --------------------------------- Vector Multiply Add Add ---------------------------------- 7236 7237instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 7238 predicate(VM_Version::supports_vnni()); 7239 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 7240 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 7241 ins_encode %{ 7242 assert(UseAVX > 2, "required"); 7243 int vector_len = vector_length_encoding(this); 7244 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7245 %} 7246 ins_pipe( pipe_slow ); 7247 ins_cost(10); 7248%} 7249 7250// --------------------------------- PopCount -------------------------------------- 7251 7252instruct vpopcountI(vec dst, vec src) %{ 7253 match(Set dst (PopCountVI src)); 7254 format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %} 7255 ins_encode %{ 7256 assert(UsePopCountInstruction, "not enabled"); 7257 7258 int vector_len = vector_length_encoding(this); 7259 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 7260 %} 7261 ins_pipe( pipe_slow ); 7262%} 7263