1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// ARM Instruction Predicate Definitions. 15// 16def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, 17 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; 18def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 19 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; 20def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 21 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; 22def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, 23 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; 24def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, 25 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; 26def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, 27 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; 28def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, 29 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; 30def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, 31 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; 32def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, 33 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; 34def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, 35 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; 36def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, 37 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; 38def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, 39 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; 40def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, 41 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; 42def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, 43 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; 44def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, 45 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; 46def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, 47 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; 48 49def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, 50 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; 51 52def HasEL3 : Predicate<"Subtarget->hasEL3()">, 53 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; 54 55def HasVH : Predicate<"Subtarget->hasVH()">, 56 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; 57 58def HasLOR : Predicate<"Subtarget->hasLOR()">, 59 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; 60 61def HasPAuth : Predicate<"Subtarget->hasPAuth()">, 62 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; 63 64def HasJS : Predicate<"Subtarget->hasJS()">, 65 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; 66 67def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, 68 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; 69 70def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, 71 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; 72 73def HasNV : Predicate<"Subtarget->hasNV()">, 74 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; 75 76def HasMPAM : Predicate<"Subtarget->hasMPAM()">, 77 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; 78 79def HasDIT : Predicate<"Subtarget->hasDIT()">, 80 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; 81 82def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, 83 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; 84 85def HasAM : Predicate<"Subtarget->hasAM()">, 86 AssemblerPredicateWithAll<(all_of FeatureAM), "am">; 87 88def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, 89 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; 90 91def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, 92 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; 93 94def HasFlagM : Predicate<"Subtarget->hasFlagM()">, 95 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; 96 97def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, 98 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; 99 100def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 101 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; 102def HasNEON : Predicate<"Subtarget->hasNEON()">, 103 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; 104def HasSM4 : Predicate<"Subtarget->hasSM4()">, 105 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; 106def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, 107 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; 108def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, 109 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; 110def HasAES : Predicate<"Subtarget->hasAES()">, 111 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; 112def HasDotProd : Predicate<"Subtarget->hasDotProd()">, 113 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; 114def HasCRC : Predicate<"Subtarget->hasCRC()">, 115 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; 116def HasCSSC : Predicate<"Subtarget->hasCSSC()">, 117 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; 118def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; 119def HasLSE : Predicate<"Subtarget->hasLSE()">, 120 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; 121def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; 122def HasRAS : Predicate<"Subtarget->hasRAS()">, 123 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; 124def HasRDM : Predicate<"Subtarget->hasRDM()">, 125 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; 126def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 127 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; 128def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, 129 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; 130def HasSPE : Predicate<"Subtarget->hasSPE()">, 131 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; 132def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, 133 AssemblerPredicateWithAll<(all_of FeatureFuseAES), 134 "fuse-aes">; 135def HasSVE : Predicate<"Subtarget->hasSVE()">, 136 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; 137def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, 138 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; 139def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">, 140 AssemblerPredicate<(any_of FeatureSVE2p1), "sve2p1">; 141def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, 142 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; 143def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, 144 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; 145def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, 146 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; 147def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, 148 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; 149def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, 150 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; 151def HasSME : Predicate<"Subtarget->hasSME()">, 152 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; 153def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, 154 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; 155def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, 156 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; 157def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, 158 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; 159def HasSME2 : Predicate<"Subtarget->hasSME2()">, 160 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; 161def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, 162 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; 163 164// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, 165// they should be enabled if either has been specified. 166def HasSVEorSME 167 : Predicate<"Subtarget->hasSVEorSME()">, 168 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), 169 "sve or sme">; 170def HasSVE2orSME 171 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, 172 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), 173 "sve2 or sme">; 174def HasSVE2p1_or_HasSME 175 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, 176 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; 177def HasSVE2p1_or_HasSME2 178 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, 179 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; 180def HasSVE2p1_or_HasSME2p1 181 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, 182 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; 183// A subset of NEON instructions are legal in Streaming SVE execution mode, 184// they should be enabled if either has been specified. 185def HasNEONorSME 186 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, 187 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), 188 "neon or sme">; 189def HasRCPC : Predicate<"Subtarget->hasRCPC()">, 190 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; 191def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, 192 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; 193def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, 194 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; 195def HasSB : Predicate<"Subtarget->hasSB()">, 196 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; 197def HasPredRes : Predicate<"Subtarget->hasPredRes()">, 198 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; 199def HasCCDP : Predicate<"Subtarget->hasCCDP()">, 200 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; 201def HasBTI : Predicate<"Subtarget->hasBTI()">, 202 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; 203def HasMTE : Predicate<"Subtarget->hasMTE()">, 204 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; 205def HasTME : Predicate<"Subtarget->hasTME()">, 206 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; 207def HasETE : Predicate<"Subtarget->hasETE()">, 208 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; 209def HasTRBE : Predicate<"Subtarget->hasTRBE()">, 210 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; 211def HasBF16 : Predicate<"Subtarget->hasBF16()">, 212 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; 213def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, 214 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; 215def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, 216 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; 217def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, 218 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; 219def HasXS : Predicate<"Subtarget->hasXS()">, 220 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; 221def HasWFxT : Predicate<"Subtarget->hasWFxT()">, 222 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; 223def HasLS64 : Predicate<"Subtarget->hasLS64()">, 224 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; 225def HasBRBE : Predicate<"Subtarget->hasBRBE()">, 226 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; 227def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, 228 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; 229def HasHBC : Predicate<"Subtarget->hasHBC()">, 230 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; 231def HasMOPS : Predicate<"Subtarget->hasMOPS()">, 232 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; 233def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, 234 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; 235def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, 236 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; 237def HasITE : Predicate<"Subtarget->hasITE()">, 238 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; 239def HasTHE : Predicate<"Subtarget->hasTHE()">, 240 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; 241def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, 242 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; 243def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, 244 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; 245def HasD128 : Predicate<"Subtarget->hasD128()">, 246 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; 247def HasCHK : Predicate<"Subtarget->hasCHK()">, 248 AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">; 249def HasGCS : Predicate<"Subtarget->hasGCS()">, 250 AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">; 251def IsLE : Predicate<"Subtarget->isLittleEndian()">; 252def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 253def IsWindows : Predicate<"Subtarget->isTargetWindows()">; 254def UseExperimentalZeroingPseudos 255 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; 256def UseAlternateSExtLoadCVTF32 257 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; 258 259def UseNegativeImmediates 260 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), 261 "NegativeImmediates">; 262 263def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; 264 265def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">; 266 267def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", 268 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, 269 SDTCisInt<1>]>>; 270 271 272//===----------------------------------------------------------------------===// 273// AArch64-specific DAG Nodes. 274// 275 276// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 277def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 278 [SDTCisSameAs<0, 2>, 279 SDTCisSameAs<0, 3>, 280 SDTCisInt<0>, SDTCisVT<1, i32>]>; 281 282// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 283def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 284 [SDTCisSameAs<0, 1>, 285 SDTCisSameAs<0, 2>, 286 SDTCisInt<0>, 287 SDTCisVT<3, i32>]>; 288 289// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 290def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 291 [SDTCisSameAs<0, 2>, 292 SDTCisSameAs<0, 3>, 293 SDTCisInt<0>, 294 SDTCisVT<1, i32>, 295 SDTCisVT<4, i32>]>; 296 297def SDT_AArch64Brcond : SDTypeProfile<0, 3, 298 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 299 SDTCisVT<2, i32>]>; 300def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 301def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 302 SDTCisVT<2, OtherVT>]>; 303 304 305def SDT_AArch64CSel : SDTypeProfile<1, 4, 306 [SDTCisSameAs<0, 1>, 307 SDTCisSameAs<0, 2>, 308 SDTCisInt<3>, 309 SDTCisVT<4, i32>]>; 310def SDT_AArch64CCMP : SDTypeProfile<1, 5, 311 [SDTCisVT<0, i32>, 312 SDTCisInt<1>, 313 SDTCisSameAs<1, 2>, 314 SDTCisInt<3>, 315 SDTCisInt<4>, 316 SDTCisVT<5, i32>]>; 317def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 318 [SDTCisVT<0, i32>, 319 SDTCisFP<1>, 320 SDTCisSameAs<1, 2>, 321 SDTCisInt<3>, 322 SDTCisInt<4>, 323 SDTCisVT<5, i32>]>; 324def SDT_AArch64FCmp : SDTypeProfile<0, 2, 325 [SDTCisFP<0>, 326 SDTCisSameAs<0, 1>]>; 327def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 328def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 329def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; 330def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 331 SDTCisSameAs<0, 1>, 332 SDTCisSameAs<0, 2>]>; 333def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 334def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 335def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 336 SDTCisInt<2>, SDTCisInt<3>]>; 337def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 338def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 339 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 340def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 341def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 342 SDTCisVec<2>, SDTCisSameAs<2,3>]>; 343 344def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, 345 SDTCisSameAs<0,1>, 346 SDTCisSameAs<0,2>]>; 347 348def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 349def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 350def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 351def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 352 SDTCisSameAs<0,2>]>; 353def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 354 SDTCisSameAs<0,2>, 355 SDTCisSameAs<0,3>]>; 356def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 357def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 358 359def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 360 361def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 362 SDTCisPtrTy<1>]>; 363 364def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 365 366def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 367def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 368def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 369def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 370def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 371def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 372 373// Generates the general dynamic sequences, i.e. 374// adrp x0, :tlsdesc:var 375// ldr x1, [x0, #:tlsdesc_lo12:var] 376// add x0, x0, #:tlsdesc_lo12:var 377// .tlsdesccall var 378// blr x1 379 380// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 381// number of operands (the variable) 382def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 383 [SDTCisPtrTy<0>]>; 384 385def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 386 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 387 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 388 SDTCisSameAs<1, 4>]>; 389 390def SDT_AArch64TBL : SDTypeProfile<1, 2, [ 391 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> 392]>; 393 394// non-extending masked load fragment. 395def nonext_masked_load : 396 PatFrag<(ops node:$ptr, node:$pred, node:$def), 397 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 398 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 399 cast<MaskedLoadSDNode>(N)->isUnindexed() && 400 !cast<MaskedLoadSDNode>(N)->isNonTemporal(); 401}]>; 402// Any/Zero extending masked load fragments. 403def azext_masked_load : 404 PatFrag<(ops node:$ptr, node:$pred, node:$def), 405 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ 406 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || 407 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) && 408 cast<MaskedLoadSDNode>(N)->isUnindexed(); 409}]>; 410def azext_masked_load_i8 : 411 PatFrag<(ops node:$ptr, node:$pred, node:$def), 412 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 413 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 414}]>; 415def azext_masked_load_i16 : 416 PatFrag<(ops node:$ptr, node:$pred, node:$def), 417 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 418 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 419}]>; 420def azext_masked_load_i32 : 421 PatFrag<(ops node:$ptr, node:$pred, node:$def), 422 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 423 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 424}]>; 425// Sign extending masked load fragments. 426def sext_masked_load : 427 PatFrag<(ops node:$ptr, node:$pred, node:$def), 428 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 429 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && 430 cast<MaskedLoadSDNode>(N)->isUnindexed(); 431}]>; 432def sext_masked_load_i8 : 433 PatFrag<(ops node:$ptr, node:$pred, node:$def), 434 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 435 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 436}]>; 437def sext_masked_load_i16 : 438 PatFrag<(ops node:$ptr, node:$pred, node:$def), 439 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 440 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 441}]>; 442def sext_masked_load_i32 : 443 PatFrag<(ops node:$ptr, node:$pred, node:$def), 444 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 445 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 446}]>; 447 448def non_temporal_load : 449 PatFrag<(ops node:$ptr, node:$pred, node:$def), 450 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 451 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 452 cast<MaskedLoadSDNode>(N)->isUnindexed() && 453 cast<MaskedLoadSDNode>(N)->isNonTemporal(); 454}]>; 455 456// non-truncating masked store fragment. 457def nontrunc_masked_store : 458 PatFrag<(ops node:$val, node:$ptr, node:$pred), 459 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 460 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 461 cast<MaskedStoreSDNode>(N)->isUnindexed() && 462 !cast<MaskedStoreSDNode>(N)->isNonTemporal(); 463}]>; 464// truncating masked store fragments. 465def trunc_masked_store : 466 PatFrag<(ops node:$val, node:$ptr, node:$pred), 467 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 468 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 469 cast<MaskedStoreSDNode>(N)->isUnindexed(); 470}]>; 471def trunc_masked_store_i8 : 472 PatFrag<(ops node:$val, node:$ptr, node:$pred), 473 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 474 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 475}]>; 476def trunc_masked_store_i16 : 477 PatFrag<(ops node:$val, node:$ptr, node:$pred), 478 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 479 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 480}]>; 481def trunc_masked_store_i32 : 482 PatFrag<(ops node:$val, node:$ptr, node:$pred), 483 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 484 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 485}]>; 486 487def non_temporal_store : 488 PatFrag<(ops node:$val, node:$ptr, node:$pred), 489 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 490 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 491 cast<MaskedStoreSDNode>(N)->isUnindexed() && 492 cast<MaskedStoreSDNode>(N)->isNonTemporal(); 493}]>; 494 495multiclass masked_gather_scatter<PatFrags GatherScatterOp> { 496 // offsets = (signed)Index << sizeof(elt) 497 def NAME#_signed_scaled : 498 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 499 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 500 auto MGS = cast<MaskedGatherScatterSDNode>(N); 501 bool Signed = MGS->isIndexSigned() || 502 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 503 return Signed && MGS->isIndexScaled(); 504 }]>; 505 // offsets = (signed)Index 506 def NAME#_signed_unscaled : 507 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 508 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 509 auto MGS = cast<MaskedGatherScatterSDNode>(N); 510 bool Signed = MGS->isIndexSigned() || 511 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 512 return Signed && !MGS->isIndexScaled(); 513 }]>; 514 // offsets = (unsigned)Index << sizeof(elt) 515 def NAME#_unsigned_scaled : 516 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 517 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 518 auto MGS = cast<MaskedGatherScatterSDNode>(N); 519 bool Signed = MGS->isIndexSigned() || 520 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 521 return !Signed && MGS->isIndexScaled(); 522 }]>; 523 // offsets = (unsigned)Index 524 def NAME#_unsigned_unscaled : 525 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 526 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 527 auto MGS = cast<MaskedGatherScatterSDNode>(N); 528 bool Signed = MGS->isIndexSigned() || 529 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 530 return !Signed && !MGS->isIndexScaled(); 531 }]>; 532} 533 534defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>; 535defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>; 536defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>; 537defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>; 538defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>; 539defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>; 540defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>; 541 542defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>; 543defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>; 544defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>; 545defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>; 546 547// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise 548def top16Zero: PatLeaf<(i32 GPR32:$src), [{ 549 return SDValue(N,0)->getValueType(0) == MVT::i32 && 550 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); 551 }]>; 552 553// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise 554def top32Zero: PatLeaf<(i64 GPR64:$src), [{ 555 return SDValue(N,0)->getValueType(0) == MVT::i64 && 556 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); 557 }]>; 558 559// topbitsallzero - Return true if all bits except the lowest bit are known zero 560def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ 561 return SDValue(N,0)->getValueType(0) == MVT::i32 && 562 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); 563 }]>; 564def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ 565 return SDValue(N,0)->getValueType(0) == MVT::i64 && 566 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); 567 }]>; 568 569// Node definitions. 570def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 571def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; 572def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 573def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 574def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 575 SDCallSeqStart<[ SDTCisVT<0, i32>, 576 SDTCisVT<1, i32> ]>, 577 [SDNPHasChain, SDNPOutGlue]>; 578def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 579 SDCallSeqEnd<[ SDTCisVT<0, i32>, 580 SDTCisVT<1, i32> ]>, 581 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 582def AArch64call : SDNode<"AArch64ISD::CALL", 583 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 584 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 585 SDNPVariadic]>; 586 587def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", 588 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 589 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 590 SDNPVariadic]>; 591 592def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 593 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 594 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 595 SDNPVariadic]>; 596 597def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 598 [SDNPHasChain]>; 599def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 600 [SDNPHasChain]>; 601def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 602 [SDNPHasChain]>; 603def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 604 [SDNPHasChain]>; 605def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 606 [SDNPHasChain]>; 607 608 609def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 610def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 611def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 612def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 613def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone, 614 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 615def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 616def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 617def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 618 [SDNPCommutative]>; 619def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 620def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 621 [SDNPCommutative]>; 622def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 623def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 624 625def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 626def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 627def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 628 629def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 630 631def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 632def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, 633 [SDNPHasChain]>; 634def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, 635 [SDNPHasChain]>; 636def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), 637 [(AArch64strict_fcmp node:$lhs, node:$rhs), 638 (AArch64fcmp node:$lhs, node:$rhs)]>; 639 640def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 641def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 642def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 643def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 644def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 645def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; 646 647def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; 648 649def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 650def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 651def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 652def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 653def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 654def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 655 656def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 657def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 658def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 659def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 660def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 661def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 662def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 663 664def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 665def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 666def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 667def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 668 669def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 670def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 671def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 672def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 673def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 674def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 675def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 676def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 677def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; 678def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; 679 680def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 681def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; 682 683def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 684def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 685def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 686def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 687def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 688 689def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 690def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 691def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 692 693def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 694def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 695def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 696def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 697def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 698def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 699 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 700 701def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 702def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 703def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 704def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 705def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 706 707def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 708def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 709 710def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 711 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 712 713def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 714 [SDNPHasChain, SDNPSideEffect]>; 715 716def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 717def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 718 719def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 720 SDT_AArch64TLSDescCallSeq, 721 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 722 SDNPVariadic]>; 723 724 725def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 726 SDT_AArch64WrapperLarge>; 727 728def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 729 730def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 731 SDTCisSameAs<1, 2>]>; 732def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, 733 [SDNPCommutative]>; 734def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, 735 [SDNPCommutative]>; 736def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, 737 [SDNPCommutative]>; 738 739def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; 740def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; 741def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; 742def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; 743 744def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; 745def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; 746 747def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 748def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 749def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 750def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 751def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 752def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 753 754def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), 755 [(abdu node:$lhs, node:$rhs), 756 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 757def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), 758 [(abds node:$lhs, node:$rhs), 759 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 760 761def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; 762def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; 763def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; 764def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), 765 [(AArch64addp_n node:$Rn, node:$Rm), 766 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; 767def AArch64uaddlp : PatFrags<(ops node:$src), 768 [(AArch64uaddlp_n node:$src), 769 (int_aarch64_neon_uaddlp node:$src)]>; 770def AArch64saddlp : PatFrags<(ops node:$src), 771 [(AArch64saddlp_n node:$src), 772 (int_aarch64_neon_saddlp node:$src)]>; 773def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), 774 [(AArch64addp_n node:$Rn, node:$Rm), 775 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; 776def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>; 777def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm), 778 [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)), 779 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>; 780def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm), 781 [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)), 782 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>; 783 784def AArch64fmaxnmv : PatFrags<(ops node:$Rn), 785 [(vecreduce_fmax node:$Rn), 786 (int_aarch64_neon_fmaxnmv node:$Rn)]>; 787def AArch64fminnmv : PatFrags<(ops node:$Rn), 788 [(vecreduce_fmin node:$Rn), 789 (int_aarch64_neon_fminnmv node:$Rn)]>; 790def AArch64fmaxv : PatFrags<(ops node:$Rn), 791 [(vecreduce_fmaximum node:$Rn), 792 (int_aarch64_neon_fmaxv node:$Rn)]>; 793def AArch64fminv : PatFrags<(ops node:$Rn), 794 [(vecreduce_fminimum node:$Rn), 795 (int_aarch64_neon_fminv node:$Rn)]>; 796 797def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; 798def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 799def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 800def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 801def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 802 803def SDT_AArch64unpk : SDTypeProfile<1, 1, [ 804 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> 805]>; 806def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; 807def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; 808def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; 809def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; 810 811def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 812def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 813def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 814def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 815def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 816def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 817 818def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; 819def AArch64mrs : SDNode<"AArch64ISD::MRS", 820 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, 821 [SDNPHasChain, SDNPOutGlue]>; 822 823// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands 824// have no common bits. 825def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), 826 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ 827 if (N->getOpcode() == ISD::ADD) 828 return true; 829 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 830}]> { 831 let GISelPredicateCode = [{ 832 // Only handle G_ADD for now. FIXME. build capability to compute whether 833 // operands of G_OR have common bits set or not. 834 return MI.getOpcode() == TargetOpcode::G_ADD; 835 }]; 836} 837 838// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. 839def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ 840 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && 841 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; 842}]>; 843 844//===----------------------------------------------------------------------===// 845 846//===----------------------------------------------------------------------===// 847 848// AArch64 Instruction Predicate Definitions. 849// We could compute these on a per-module basis but doing so requires accessing 850// the Function object through the <Target>Subtarget and objections were raised 851// to that (see post-commit review comments for r301750). 852let RecomputePerFunction = 1 in { 853 def ForCodeSize : Predicate<"shouldOptForSize(MF)">; 854 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; 855 // Avoid generating STRQro if it is slow, unless we're optimizing for code size. 856 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; 857 858 def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 859 def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 860 861 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 862 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 863 // Toggles patterns which aren't beneficial in GlobalISel when we aren't 864 // optimizing. This allows us to selectively use patterns without impacting 865 // SelectionDAG's behaviour. 866 // FIXME: One day there will probably be a nicer way to check for this, but 867 // today is not that day. 868 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; 869} 870 871include "AArch64InstrFormats.td" 872include "SVEInstrFormats.td" 873include "SMEInstrFormats.td" 874 875//===----------------------------------------------------------------------===// 876 877//===----------------------------------------------------------------------===// 878// Miscellaneous instructions. 879//===----------------------------------------------------------------------===// 880 881let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { 882// We set Sched to empty list because we expect these instructions to simply get 883// removed in most cases. 884def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 885 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, 886 Sched<[]>; 887def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 888 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, 889 Sched<[]>; 890} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 891 892let isReMaterializable = 1, isCodeGenOnly = 1 in { 893// FIXME: The following pseudo instructions are only needed because remat 894// cannot handle multiple instructions. When that changes, they can be 895// removed, along with the AArch64Wrapper node. 896 897let AddedComplexity = 10 in 898def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), 899 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 900 Sched<[WriteLDAdr]>; 901 902// The MOVaddr instruction should match only when the add is not folded 903// into a load or store address. 904def MOVaddr 905 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 906 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 907 tglobaladdr:$low))]>, 908 Sched<[WriteAdrAdr]>; 909def MOVaddrJT 910 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 911 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 912 tjumptable:$low))]>, 913 Sched<[WriteAdrAdr]>; 914def MOVaddrCP 915 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 916 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 917 tconstpool:$low))]>, 918 Sched<[WriteAdrAdr]>; 919def MOVaddrBA 920 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 921 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 922 tblockaddress:$low))]>, 923 Sched<[WriteAdrAdr]>; 924def MOVaddrTLS 925 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 926 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 927 tglobaltlsaddr:$low))]>, 928 Sched<[WriteAdrAdr]>; 929def MOVaddrEXT 930 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 931 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 932 texternalsym:$low))]>, 933 Sched<[WriteAdrAdr]>; 934// Normally AArch64addlow either gets folded into a following ldr/str, 935// or together with an adrp into MOVaddr above. For cases with TLS, it 936// might appear without either of them, so allow lowering it into a plain 937// add. 938def ADDlowTLS 939 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), 940 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, 941 tglobaltlsaddr:$low))]>, 942 Sched<[WriteAdr]>; 943 944} // isReMaterializable, isCodeGenOnly 945 946def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 947 (LOADgot tglobaltlsaddr:$addr)>; 948 949def : Pat<(AArch64LOADgot texternalsym:$addr), 950 (LOADgot texternalsym:$addr)>; 951 952def : Pat<(AArch64LOADgot tconstpool:$addr), 953 (LOADgot tconstpool:$addr)>; 954 955// In general these get lowered into a sequence of three 4-byte instructions. 956// 32-bit jump table destination is actually only 2 instructions since we can 957// use the table itself as a PC-relative base. But optimization occurs after 958// branch relaxation so be pessimistic. 959let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 960 isNotDuplicable = 1 in { 961def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 962 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 963 Sched<[]>; 964def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 965 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 966 Sched<[]>; 967def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 968 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 969 Sched<[]>; 970} 971 972// Space-consuming pseudo to aid testing of placement and reachability 973// algorithms. Immediate operand is the number of bytes this "instruction" 974// occupies; register operands can be used to enforce dependency and constrain 975// the scheduler. 976let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in 977def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), 978 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, 979 Sched<[]>; 980 981let hasSideEffects = 1, isCodeGenOnly = 1 in { 982 def SpeculationSafeValueX 983 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; 984 def SpeculationSafeValueW 985 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; 986} 987 988// SpeculationBarrierEndBB must only be used after an unconditional control 989// flow, i.e. after a terminator for which isBarrier is True. 990let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 991 // This gets lowered to a pair of 4-byte instructions. 992 let Size = 8 in 993 def SpeculationBarrierISBDSBEndBB 994 : Pseudo<(outs), (ins), []>, Sched<[]>; 995 // This gets lowered to a 4-byte instruction. 996 let Size = 4 in 997 def SpeculationBarrierSBEndBB 998 : Pseudo<(outs), (ins), []>, Sched<[]>; 999} 1000 1001//===----------------------------------------------------------------------===// 1002// System instructions. 1003//===----------------------------------------------------------------------===// 1004 1005def HINT : HintI<"hint">; 1006def : InstAlias<"nop", (HINT 0b000)>; 1007def : InstAlias<"yield",(HINT 0b001)>; 1008def : InstAlias<"wfe", (HINT 0b010)>; 1009def : InstAlias<"wfi", (HINT 0b011)>; 1010def : InstAlias<"sev", (HINT 0b100)>; 1011def : InstAlias<"sevl", (HINT 0b101)>; 1012def : InstAlias<"dgh", (HINT 0b110)>; 1013def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; 1014def : InstAlias<"csdb", (HINT 20)>; 1015// In order to be able to write readable assembly, LLVM should accept assembly 1016// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. 1017// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1018// should not emit these mnemonics unless BTI is enabled. 1019def : InstAlias<"bti", (HINT 32), 0>; 1020def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; 1021def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; 1022def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; 1023 1024// v8.2a Statistical Profiling extension 1025def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 1026 1027// As far as LLVM is concerned this writes to the system's exclusive monitors. 1028let mayLoad = 1, mayStore = 1 in 1029def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 1030 1031// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 1032// model patterns with sufficiently fine granularity. 1033let mayLoad = ?, mayStore = ? in { 1034def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 1035 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 1036 1037def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 1038 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 1039 1040def ISB : CRmSystemI<barrier_op, 0b110, "isb", 1041 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 1042 1043def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { 1044 let CRm = 0b0010; 1045 let Inst{12} = 0; 1046 let Predicates = [HasTRACEV8_4]; 1047} 1048 1049def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 1050 let CRm{1-0} = 0b11; 1051 let Inst{9-8} = 0b10; 1052 let Predicates = [HasXS]; 1053} 1054 1055let Predicates = [HasWFxT] in { 1056def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 1057def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 1058} 1059 1060// Branch Record Buffer two-word mnemonic instructions 1061class BRBEI<bits<3> op2, string keyword> 1062 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 1063 let Inst{31-8} = 0b110101010000100101110010; 1064 let Inst{7-5} = op2; 1065 let Predicates = [HasBRBE]; 1066} 1067def BRB_IALL: BRBEI<0b100, "\tiall">; 1068def BRB_INJ: BRBEI<0b101, "\tinj">; 1069 1070} 1071 1072// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 1073def : TokenAlias<"INJ", "inj">; 1074def : TokenAlias<"IALL", "iall">; 1075 1076 1077// ARMv9.4-A Guarded Control Stack 1078class GCSNoOp<bits<3> op2, string mnemonic> 1079 : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> { 1080 let Inst{20-8} = 0b0100001110111; 1081 let Inst{7-5} = op2; 1082 let Predicates = [HasGCS]; 1083} 1084def GCSPUSHX : GCSNoOp<0b100, "gcspushx">; 1085def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">; 1086def GCSPOPX : GCSNoOp<0b110, "gcspopx">; 1087 1088class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic, 1089 list<dag> pattern = []> 1090 : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> { 1091 let Inst{20-19} = 0b01; 1092 let Inst{18-16} = op1; 1093 let Inst{15-8} = 0b01110111; 1094 let Inst{7-5} = op2; 1095 let Predicates = [HasGCS]; 1096} 1097 1098def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; 1099def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; 1100 1101class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic, 1102 list<dag> pattern = []> 1103 : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> { 1104 let Inst{20-19} = 0b01; 1105 let Inst{18-16} = op1; 1106 let Inst{15-8} = 0b01110111; 1107 let Inst{7-5} = op2; 1108 let Predicates = [HasGCS]; 1109} 1110 1111def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; 1112def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">; 1113def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent 1114 1115def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; 1116def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>; 1117 1118def : TokenAlias<"DSYNC", "dsync">; 1119 1120let Uses = [X16], Defs = [X16], CRm = 0b0101 in { 1121 def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">; 1122} 1123def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; 1124def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; 1125 1126class GCSSt<string mnemonic, bits<3> op> 1127 : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> { 1128 bits<5> Rt; 1129 bits<5> Rn; 1130 let Inst{31-15} = 0b11011001000111110; 1131 let Inst{14-12} = op; 1132 let Inst{11-10} = 0b11; 1133 let Inst{9-5} = Rn; 1134 let Inst{4-0} = Rt; 1135 let Predicates = [HasGCS]; 1136} 1137def GCSSTR : GCSSt<"gcsstr", 0b000>; 1138def GCSSTTR : GCSSt<"gcssttr", 0b001>; 1139 1140 1141// ARMv8.2-A Dot Product 1142let Predicates = [HasDotProd] in { 1143defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; 1144defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; 1145defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; 1146defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; 1147} 1148 1149// ARMv8.6-A BFloat 1150let Predicates = [HasNEON, HasBF16] in { 1151defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; 1152defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; 1153def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; 1154def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1155def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1156def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1157def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1158def BFCVTN : SIMD_BFCVTN; 1159def BFCVTN2 : SIMD_BFCVTN2; 1160 1161// Vector-scalar BFDOT: 1162// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 1163// register (the instruction uses a single 32-bit lane from it), so the pattern 1164// is a bit tricky. 1165def : Pat<(v2f32 (int_aarch64_neon_bfdot 1166 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1167 (v4bf16 (bitconvert 1168 (v2i32 (AArch64duplane32 1169 (v4i32 (bitconvert 1170 (v8bf16 (insert_subvector undef, 1171 (v4bf16 V64:$Rm), 1172 (i64 0))))), 1173 VectorIndexS:$idx)))))), 1174 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1175 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 1176 VectorIndexS:$idx)>; 1177} 1178 1179let Predicates = [HasNEONorSME, HasBF16] in { 1180def BFCVT : BF16ToSinglePrecision<"bfcvt">; 1181} 1182 1183// ARMv8.6A AArch64 matrix multiplication 1184let Predicates = [HasMatMulInt8] in { 1185def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; 1186def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; 1187def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; 1188defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; 1189defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; 1190 1191// sudot lane has a pattern where usdot is expected (there is no sudot). 1192// The second operand is used in the dup operation to repeat the indexed 1193// element. 1194class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind, 1195 string rhs_kind, RegisterOperand RegType, 1196 ValueType AccumType, ValueType InputType> 1197 : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind, 1198 lhs_kind, rhs_kind, RegType, AccumType, 1199 InputType, null_frag> { 1200 let Pattern = [(set (AccumType RegType:$dst), 1201 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), 1202 (InputType (bitconvert (AccumType 1203 (AArch64duplane32 (v4i32 V128:$Rm), 1204 VectorIndexS:$idx)))), 1205 (InputType RegType:$Rn))))]; 1206} 1207 1208multiclass SIMDSUDOTIndex { 1209 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; 1210 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; 1211} 1212 1213defm SUDOTlane : SIMDSUDOTIndex; 1214 1215} 1216 1217// ARMv8.2-A FP16 Fused Multiply-Add Long 1218let Predicates = [HasNEON, HasFP16FML] in { 1219defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; 1220defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; 1221defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; 1222defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; 1223defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; 1224defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; 1225defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; 1226defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; 1227} 1228 1229// Armv8.2-A Crypto extensions 1230let Predicates = [HasSHA3] in { 1231def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; 1232def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; 1233def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; 1234def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; 1235def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; 1236def EOR3 : CryptoRRRR_16B<0b00, "eor3">; 1237def BCAX : CryptoRRRR_16B<0b01, "bcax">; 1238def XAR : CryptoRRRi6<"xar">; 1239 1240class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy> 1241 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), 1242 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; 1243 1244def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1245 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1246 1247def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>; 1248def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>; 1249def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>; 1250 1251def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>; 1252def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>; 1253def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>; 1254def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>; 1255 1256class EOR3_pattern<ValueType VecTy> 1257 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), 1258 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1259 1260def : EOR3_pattern<v16i8>; 1261def : EOR3_pattern<v8i16>; 1262def : EOR3_pattern<v4i32>; 1263def : EOR3_pattern<v2i64>; 1264 1265class BCAX_pattern<ValueType VecTy> 1266 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), 1267 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1268 1269def : BCAX_pattern<v16i8>; 1270def : BCAX_pattern<v8i16>; 1271def : BCAX_pattern<v4i32>; 1272def : BCAX_pattern<v2i64>; 1273 1274def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>; 1275def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>; 1276def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>; 1277def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>; 1278 1279def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>; 1280def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>; 1281def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>; 1282def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>; 1283 1284def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>; 1285def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>; 1286def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>; 1287def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>; 1288 1289def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1290 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1291 1292def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), 1293 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; 1294 1295def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))), 1296 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1297 1298} // HasSHA3 1299 1300let Predicates = [HasSM4] in { 1301def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; 1302def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; 1303def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; 1304def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; 1305def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; 1306def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; 1307def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; 1308def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; 1309def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; 1310 1311def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), 1312 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; 1313 1314class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode> 1315 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1316 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1317 1318class SM3TT_pattern<Instruction INST, Intrinsic OpNode> 1319 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), 1320 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; 1321 1322class SM4_pattern<Instruction INST, Intrinsic OpNode> 1323 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1324 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1325 1326def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>; 1327def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>; 1328 1329def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>; 1330def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>; 1331def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>; 1332def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>; 1333 1334def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>; 1335def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>; 1336} // HasSM4 1337 1338let Predicates = [HasRCPC] in { 1339 // v8.3 Release Consistent Processor Consistent support, optional in v8.2. 1340 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; 1341 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; 1342 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; 1343 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; 1344} 1345 1346// v8.3a complex add and multiply-accumulate. No predicate here, that is done 1347// inside the multiclass as the FP16 versions need different predicates. 1348defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, 1349 "fcmla", null_frag>; 1350defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, 1351 "fcadd", null_frag>; 1352defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; 1353 1354let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1355 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1356 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; 1357 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1358 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; 1359 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1360 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; 1361 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1362 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; 1363} 1364 1365let Predicates = [HasComplxNum, HasNEON] in { 1366 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1367 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; 1368 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1369 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; 1370 foreach Ty = [v4f32, v2f64] in { 1371 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), 1372 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; 1373 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), 1374 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; 1375 } 1376} 1377 1378multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> { 1379 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1380 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 1381 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1382 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 1383 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1384 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 1385 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1386 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 1387} 1388 1389multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> { 1390 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1391 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 1392 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1393 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 1394 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1395 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 1396 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1397 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 1398} 1399 1400 1401let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1402 defm : FCMLA_PATS<v4f16, V64>; 1403 defm : FCMLA_PATS<v8f16, V128>; 1404 1405 defm : FCMLA_LANE_PATS<v4f16, V64, 1406 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 1407 defm : FCMLA_LANE_PATS<v8f16, V128, 1408 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 1409} 1410let Predicates = [HasComplxNum, HasNEON] in { 1411 defm : FCMLA_PATS<v2f32, V64>; 1412 defm : FCMLA_PATS<v4f32, V128>; 1413 defm : FCMLA_PATS<v2f64, V128>; 1414 1415 defm : FCMLA_LANE_PATS<v4f32, V128, 1416 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 1417} 1418 1419// v8.3a Pointer Authentication 1420// These instructions inhabit part of the hint space and so can be used for 1421// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is 1422// important for compatibility with other assemblers (e.g. GAS) when building 1423// software compatible with both CPUs that do or don't implement PA. 1424let Uses = [LR], Defs = [LR] in { 1425 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; 1426 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; 1427 let isAuthenticated = 1 in { 1428 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; 1429 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; 1430 } 1431} 1432let Uses = [LR, SP], Defs = [LR] in { 1433 def PACIASP : SystemNoOperands<0b001, "hint\t#25">; 1434 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; 1435 let isAuthenticated = 1 in { 1436 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; 1437 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; 1438 } 1439} 1440let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { 1441 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; 1442 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; 1443 let isAuthenticated = 1 in { 1444 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; 1445 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; 1446 } 1447} 1448 1449let Uses = [LR], Defs = [LR], CRm = 0b0000 in { 1450 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; 1451} 1452 1453// In order to be able to write readable assembly, LLVM should accept assembly 1454// inputs that use pointer authentication mnemonics, even with PA disabled. 1455// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1456// should not emit these mnemonics unless PA is enabled. 1457def : InstAlias<"paciaz", (PACIAZ), 0>; 1458def : InstAlias<"pacibz", (PACIBZ), 0>; 1459def : InstAlias<"autiaz", (AUTIAZ), 0>; 1460def : InstAlias<"autibz", (AUTIBZ), 0>; 1461def : InstAlias<"paciasp", (PACIASP), 0>; 1462def : InstAlias<"pacibsp", (PACIBSP), 0>; 1463def : InstAlias<"autiasp", (AUTIASP), 0>; 1464def : InstAlias<"autibsp", (AUTIBSP), 0>; 1465def : InstAlias<"pacia1716", (PACIA1716), 0>; 1466def : InstAlias<"pacib1716", (PACIB1716), 0>; 1467def : InstAlias<"autia1716", (AUTIA1716), 0>; 1468def : InstAlias<"autib1716", (AUTIB1716), 0>; 1469def : InstAlias<"xpaclri", (XPACLRI), 0>; 1470 1471// These pointer authentication instructions require armv8.3a 1472let Predicates = [HasPAuth] in { 1473 1474 // When PA is enabled, a better mnemonic should be emitted. 1475 def : InstAlias<"paciaz", (PACIAZ), 1>; 1476 def : InstAlias<"pacibz", (PACIBZ), 1>; 1477 def : InstAlias<"autiaz", (AUTIAZ), 1>; 1478 def : InstAlias<"autibz", (AUTIBZ), 1>; 1479 def : InstAlias<"paciasp", (PACIASP), 1>; 1480 def : InstAlias<"pacibsp", (PACIBSP), 1>; 1481 def : InstAlias<"autiasp", (AUTIASP), 1>; 1482 def : InstAlias<"autibsp", (AUTIBSP), 1>; 1483 def : InstAlias<"pacia1716", (PACIA1716), 1>; 1484 def : InstAlias<"pacib1716", (PACIB1716), 1>; 1485 def : InstAlias<"autia1716", (AUTIA1716), 1>; 1486 def : InstAlias<"autib1716", (AUTIB1716), 1>; 1487 def : InstAlias<"xpaclri", (XPACLRI), 1>; 1488 1489 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm, 1490 SDPatternOperator op> { 1491 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>; 1492 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>; 1493 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>; 1494 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>; 1495 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>; 1496 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>; 1497 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>; 1498 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; 1499 } 1500 1501 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; 1502 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; 1503 1504 def XPACI : ClearAuth<0, "xpaci">; 1505 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; 1506 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; 1507 1508 def XPACD : ClearAuth<1, "xpacd">; 1509 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; 1510 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; 1511 1512 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; 1513 1514 // Combined Instructions 1515 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1516 def BRAA : AuthBranchTwoOperands<0, 0, "braa">; 1517 def BRAB : AuthBranchTwoOperands<0, 1, "brab">; 1518 } 1519 let isCall = 1, Defs = [LR], Uses = [SP] in { 1520 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; 1521 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; 1522 } 1523 1524 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1525 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; 1526 def BRABZ : AuthOneOperand<0b000, 1, "brabz">; 1527 } 1528 let isCall = 1, Defs = [LR], Uses = [SP] in { 1529 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; 1530 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; 1531 } 1532 1533 let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1534 def RETAA : AuthReturn<0b010, 0, "retaa">; 1535 def RETAB : AuthReturn<0b010, 1, "retab">; 1536 def ERETAA : AuthReturn<0b100, 0, "eretaa">; 1537 def ERETAB : AuthReturn<0b100, 1, "eretab">; 1538 } 1539 1540 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; 1541 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; 1542 1543} 1544 1545// v8.3a floating point conversion for javascript 1546let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 1547def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, 1548 "fjcvtzs", 1549 [(set GPR32:$Rd, 1550 (int_aarch64_fjcvtzs FPR64:$Rn))]> { 1551 let Inst{31} = 0; 1552} // HasJS, HasFPARMv8 1553 1554// v8.4 Flag manipulation instructions 1555let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 1556def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { 1557 let Inst{20-5} = 0b0000001000000000; 1558} 1559def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; 1560def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; 1561def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", 1562 "{\t$Rn, $imm, $mask}">; 1563} // HasFlagM 1564 1565// v8.5 flag manipulation instructions 1566let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { 1567 1568def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { 1569 let Inst{18-16} = 0b000; 1570 let Inst{11-8} = 0b0000; 1571 let Unpredictable{11-8} = 0b1111; 1572 let Inst{7-5} = 0b001; 1573} 1574 1575def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { 1576 let Inst{18-16} = 0b000; 1577 let Inst{11-8} = 0b0000; 1578 let Unpredictable{11-8} = 0b1111; 1579 let Inst{7-5} = 0b010; 1580} 1581} // HasAltNZCV 1582 1583 1584// Armv8.5-A speculation barrier 1585def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { 1586 let Inst{20-5} = 0b0001100110000111; 1587 let Unpredictable{11-8} = 0b1111; 1588 let Predicates = [HasSB]; 1589 let hasSideEffects = 1; 1590} 1591 1592def : InstAlias<"clrex", (CLREX 0xf)>; 1593def : InstAlias<"isb", (ISB 0xf)>; 1594def : InstAlias<"ssbb", (DSB 0)>; 1595def : InstAlias<"pssbb", (DSB 4)>; 1596def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; 1597 1598def MRS : MRSI; 1599def MSR : MSRI; 1600def MSRpstateImm1 : MSRpstateImm0_1; 1601def MSRpstateImm4 : MSRpstateImm0_15; 1602 1603def : Pat<(AArch64mrs imm:$id), 1604 (MRS imm:$id)>; 1605 1606// The thread pointer (on Linux, at least, where this has been implemented) is 1607// TPIDR_EL0. 1608def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), 1609 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; 1610 1611// This gets lowered into a 24-byte instruction sequence 1612let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { 1613def KCFI_CHECK : Pseudo< 1614 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; 1615} 1616 1617let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { 1618def HWASAN_CHECK_MEMACCESS : Pseudo< 1619 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1620 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1621 Sched<[]>; 1622} 1623 1624let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 1625def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< 1626 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1627 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1628 Sched<[]>; 1629} 1630 1631// The virtual cycle counter register is CNTVCT_EL0. 1632def : Pat<(readcyclecounter), (MRS 0xdf02)>; 1633 1634// FPCR register 1635let Uses = [FPCR] in 1636def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), 1637 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, 1638 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, 1639 Sched<[WriteSys]>; 1640let Defs = [FPCR] in 1641def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), 1642 [(int_aarch64_set_fpcr i64:$val)]>, 1643 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, 1644 Sched<[WriteSys]>; 1645 1646// Generic system instructions 1647def SYSxt : SystemXtI<0, "sys">; 1648def SYSLxt : SystemLXtI<1, "sysl">; 1649 1650def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 1651 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 1652 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 1653 1654 1655let Predicates = [HasTME] in { 1656 1657def TSTART : TMSystemI<0b0000, "tstart", 1658 [(set GPR64:$Rt, (int_aarch64_tstart))]>; 1659 1660def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; 1661 1662def TCANCEL : TMSystemException<0b011, "tcancel", 1663 [(int_aarch64_tcancel timm64_0_65535:$imm)]>; 1664 1665def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { 1666 let mayLoad = 0; 1667 let mayStore = 0; 1668} 1669} // HasTME 1670 1671//===----------------------------------------------------------------------===// 1672// Move immediate instructions. 1673//===----------------------------------------------------------------------===// 1674 1675defm MOVK : InsertImmediate<0b11, "movk">; 1676defm MOVN : MoveImmediate<0b00, "movn">; 1677 1678let PostEncoderMethod = "fixMOVZ" in 1679defm MOVZ : MoveImmediate<0b10, "movz">; 1680 1681// First group of aliases covers an implicit "lsl #0". 1682def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; 1683def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; 1684def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1685def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1686def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1687def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1688 1689// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 1690def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1691def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1692def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1693def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1694 1695def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1696def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1697def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1698def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1699 1700def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; 1701def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; 1702def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; 1703def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; 1704 1705def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1706def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1707 1708def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1709def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1710 1711def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; 1712def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; 1713 1714// Final group of aliases covers true "mov $Rd, $imm" cases. 1715multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 1716 int width, int shift> { 1717 def _asmoperand : AsmOperandClass { 1718 let Name = basename # width # "_lsl" # shift # "MovAlias"; 1719 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 1720 # shift # ">"; 1721 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 1722 } 1723 1724 def _movimm : Operand<i32> { 1725 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 1726 } 1727 1728 def : InstAlias<"mov $Rd, $imm", 1729 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 1730} 1731 1732defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 1733defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 1734 1735defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 1736defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 1737defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 1738defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 1739 1740defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 1741defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 1742 1743defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 1744defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 1745defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 1746defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 1747 1748let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 1749 isAsCheapAsAMove = 1 in { 1750// FIXME: The following pseudo instructions are only needed because remat 1751// cannot handle multiple instructions. When that changes, we can select 1752// directly to the real instructions and get rid of these pseudos. 1753 1754def MOVi32imm 1755 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 1756 [(set GPR32:$dst, imm:$src)]>, 1757 Sched<[WriteImm]>; 1758def MOVi64imm 1759 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 1760 [(set GPR64:$dst, imm:$src)]>, 1761 Sched<[WriteImm]>; 1762} // isReMaterializable, isCodeGenOnly 1763 1764// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 1765// eventual expansion code fewer bits to worry about getting right. Marshalling 1766// the types is a little tricky though: 1767def i64imm_32bit : ImmLeaf<i64, [{ 1768 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 1769}]>; 1770 1771def s64imm_32bit : ImmLeaf<i64, [{ 1772 int64_t Imm64 = static_cast<int64_t>(Imm); 1773 return Imm64 >= std::numeric_limits<int32_t>::min() && 1774 Imm64 <= std::numeric_limits<int32_t>::max(); 1775}]>; 1776 1777def trunc_imm : SDNodeXForm<imm, [{ 1778 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 1779}]>; 1780 1781def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, 1782 GISDNodeXFormEquiv<trunc_imm>; 1783 1784let Predicates = [OptimizedGISelOrOtherSelector] in { 1785// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless 1786// copies. 1787def : Pat<(i64 i64imm_32bit:$src), 1788 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 1789} 1790 1791// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 1792def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 1793return CurDAG->getTargetConstant( 1794 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 1795}]>; 1796 1797def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 1798return CurDAG->getTargetConstant( 1799 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 1800}]>; 1801 1802 1803def : Pat<(f32 fpimm:$in), 1804 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 1805def : Pat<(f64 fpimm:$in), 1806 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 1807 1808 1809// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 1810// sequences. 1811def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 1812 tglobaladdr:$g1, tglobaladdr:$g0), 1813 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), 1814 tglobaladdr:$g1, 16), 1815 tglobaladdr:$g2, 32), 1816 tglobaladdr:$g3, 48)>; 1817 1818def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 1819 tblockaddress:$g1, tblockaddress:$g0), 1820 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), 1821 tblockaddress:$g1, 16), 1822 tblockaddress:$g2, 32), 1823 tblockaddress:$g3, 48)>; 1824 1825def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 1826 tconstpool:$g1, tconstpool:$g0), 1827 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), 1828 tconstpool:$g1, 16), 1829 tconstpool:$g2, 32), 1830 tconstpool:$g3, 48)>; 1831 1832def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 1833 tjumptable:$g1, tjumptable:$g0), 1834 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), 1835 tjumptable:$g1, 16), 1836 tjumptable:$g2, 32), 1837 tjumptable:$g3, 48)>; 1838 1839 1840//===----------------------------------------------------------------------===// 1841// Arithmetic instructions. 1842//===----------------------------------------------------------------------===// 1843 1844// Add/subtract with carry. 1845defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 1846defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 1847 1848def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 1849def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 1850def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 1851def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 1852 1853// Add/subtract 1854defm ADD : AddSub<0, "add", "sub", add>; 1855defm SUB : AddSub<1, "sub", "add">; 1856 1857def : InstAlias<"mov $dst, $src", 1858 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 1859def : InstAlias<"mov $dst, $src", 1860 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 1861def : InstAlias<"mov $dst, $src", 1862 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 1863def : InstAlias<"mov $dst, $src", 1864 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 1865 1866defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 1867defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 1868 1869def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ 1870 return N->getOpcode() == ISD::CopyFromReg && 1871 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; 1872}]>; 1873 1874// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 1875def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 1876 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 1877def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 1878 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 1879def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 1880 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 1881def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 1882 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 1883def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 1884 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 1885def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 1886 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 1887let AddedComplexity = 1 in { 1888def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), 1889 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; 1890def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), 1891 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; 1892def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), 1893 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; 1894} 1895 1896// Because of the immediate format for add/sub-imm instructions, the 1897// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1898// These patterns capture that transformation. 1899let AddedComplexity = 1 in { 1900def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1901 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1902def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1903 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1904def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1905 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1906def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1907 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1908} 1909 1910// Because of the immediate format for add/sub-imm instructions, the 1911// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1912// These patterns capture that transformation. 1913let AddedComplexity = 1 in { 1914def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1915 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1916def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1917 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1918def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1919 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1920def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1921 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1922} 1923 1924def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1925def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1926def : InstAlias<"neg $dst, $src$shift", 1927 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1928def : InstAlias<"neg $dst, $src$shift", 1929 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1930 1931def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1932def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1933def : InstAlias<"negs $dst, $src$shift", 1934 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1935def : InstAlias<"negs $dst, $src$shift", 1936 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1937 1938 1939// Unsigned/Signed divide 1940defm UDIV : Div<0, "udiv", udiv>; 1941defm SDIV : Div<1, "sdiv", sdiv>; 1942 1943def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; 1944def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; 1945def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; 1946def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; 1947 1948// Variable shift 1949defm ASRV : Shift<0b10, "asr", sra>; 1950defm LSLV : Shift<0b00, "lsl", shl>; 1951defm LSRV : Shift<0b01, "lsr", srl>; 1952defm RORV : Shift<0b11, "ror", rotr>; 1953 1954def : ShiftAlias<"asrv", ASRVWr, GPR32>; 1955def : ShiftAlias<"asrv", ASRVXr, GPR64>; 1956def : ShiftAlias<"lslv", LSLVWr, GPR32>; 1957def : ShiftAlias<"lslv", LSLVXr, GPR64>; 1958def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 1959def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 1960def : ShiftAlias<"rorv", RORVWr, GPR32>; 1961def : ShiftAlias<"rorv", RORVXr, GPR64>; 1962 1963// Multiply-add 1964let AddedComplexity = 5 in { 1965defm MADD : MulAccum<0, "madd">; 1966defm MSUB : MulAccum<1, "msub">; 1967 1968def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 1969 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1970def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 1971 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1972 1973def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 1974 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1975def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 1976 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1977def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 1978 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1979def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 1980 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1981} // AddedComplexity = 5 1982 1983let AddedComplexity = 5 in { 1984def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 1985def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 1986def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 1987def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 1988 1989def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 1990 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1991def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 1992 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1993def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 1994 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1995def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 1996 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1997def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 1998 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1999def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 2000 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2001 2002def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 2003 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2004def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 2005 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 2006 2007def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), 2008 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2009def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), 2010 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2011def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), 2012 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2013 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2014 2015def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2016 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2017def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2018 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2019def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), 2020 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2021 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 2022 2023def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), 2024 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2025def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), 2026 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2027def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), 2028 GPR64:$Ra)), 2029 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2030 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2031 2032def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 2033 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2034def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 2035 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2036def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), 2037 (s64imm_32bit:$C)))), 2038 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 2039 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 2040 2041def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), 2042 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2043def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), 2044 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2045 2046def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), 2047 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2048def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), 2049 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2050 2051def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2052 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2053def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2054 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2055 2056def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 2057 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2058def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 2059 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2060 2061def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), 2062 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2063def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), 2064 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2065 2066def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), 2067 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2068def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), 2069 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2070 2071def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), 2072 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 2073def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2074 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 2075 2076def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), 2077 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 2078def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 2079 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 2080} // AddedComplexity = 5 2081 2082def : MulAccumWAlias<"mul", MADDWrrr>; 2083def : MulAccumXAlias<"mul", MADDXrrr>; 2084def : MulAccumWAlias<"mneg", MSUBWrrr>; 2085def : MulAccumXAlias<"mneg", MSUBXrrr>; 2086def : WideMulAccumAlias<"smull", SMADDLrrr>; 2087def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 2088def : WideMulAccumAlias<"umull", UMADDLrrr>; 2089def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 2090 2091// Multiply-high 2092def SMULHrr : MulHi<0b010, "smulh", mulhs>; 2093def UMULHrr : MulHi<0b110, "umulh", mulhu>; 2094 2095// CRC32 2096def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 2097def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 2098def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 2099def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 2100 2101def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 2102def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 2103def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 2104def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 2105 2106// v8.1 atomic CAS 2107defm CAS : CompareAndSwap<0, 0, "">; 2108defm CASA : CompareAndSwap<1, 0, "a">; 2109defm CASL : CompareAndSwap<0, 1, "l">; 2110defm CASAL : CompareAndSwap<1, 1, "al">; 2111 2112// v8.1 atomic CASP 2113defm CASP : CompareAndSwapPair<0, 0, "">; 2114defm CASPA : CompareAndSwapPair<1, 0, "a">; 2115defm CASPL : CompareAndSwapPair<0, 1, "l">; 2116defm CASPAL : CompareAndSwapPair<1, 1, "al">; 2117 2118// v8.1 atomic SWP 2119defm SWP : Swap<0, 0, "">; 2120defm SWPA : Swap<1, 0, "a">; 2121defm SWPL : Swap<0, 1, "l">; 2122defm SWPAL : Swap<1, 1, "al">; 2123 2124// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 2125defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 2126defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 2127defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 2128defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 2129 2130defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 2131defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 2132defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 2133defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 2134 2135defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 2136defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 2137defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 2138defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 2139 2140defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 2141defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 2142defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 2143defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 2144 2145defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 2146defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 2147defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 2148defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 2149 2150defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 2151defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 2152defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 2153defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 2154 2155defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 2156defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 2157defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 2158defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 2159 2160defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 2161defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 2162defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 2163defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 2164 2165// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 2166defm : STOPregister<"stadd","LDADD">; // STADDx 2167defm : STOPregister<"stclr","LDCLR">; // STCLRx 2168defm : STOPregister<"steor","LDEOR">; // STEORx 2169defm : STOPregister<"stset","LDSET">; // STSETx 2170defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 2171defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 2172defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 2173defm : STOPregister<"stumin","LDUMIN">;// STUMINx 2174 2175// v8.5 Memory Tagging Extension 2176let Predicates = [HasMTE] in { 2177 2178def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", 2179 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; 2180 2181def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", 2182 int_aarch64_gmi, GPR64sp>, Sched<[]> { 2183 let isNotDuplicable = 1; 2184} 2185def ADDG : AddSubG<0, "addg", null_frag>; 2186def SUBG : AddSubG<1, "subg", null_frag>; 2187 2188def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; 2189 2190def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; 2191def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ 2192 let Defs = [NZCV]; 2193} 2194 2195def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; 2196 2197def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; 2198 2199def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), 2200 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; 2201def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2202 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2203 2204def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; 2205 2206def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", 2207 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; 2208def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", 2209 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; 2210def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", 2211 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { 2212 let Inst{23} = 0; 2213} 2214 2215defm STG : MemTagStore<0b00, "stg">; 2216defm STZG : MemTagStore<0b01, "stzg">; 2217defm ST2G : MemTagStore<0b10, "st2g">; 2218defm STZ2G : MemTagStore<0b11, "stz2g">; 2219 2220def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2221 (STGi $Rn, $Rm, $imm)>; 2222def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2223 (STZGi $Rn, $Rm, $imm)>; 2224def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2225 (ST2Gi $Rn, $Rm, $imm)>; 2226def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2227 (STZ2Gi $Rn, $Rm, $imm)>; 2228 2229defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; 2230def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; 2231def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; 2232 2233def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2234 (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2235 2236def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), 2237 (STGPi $Rt, $Rt2, $Rn, $imm)>; 2238 2239def IRGstack 2240 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, 2241 Sched<[]>; 2242def TAGPstack 2243 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, 2244 Sched<[]>; 2245 2246// Explicit SP in the first operand prevents ShrinkWrap optimization 2247// from leaving this instruction out of the stack frame. When IRGstack 2248// is transformed into IRG, this operand is replaced with the actual 2249// register / expression for the tagged base pointer of the current function. 2250def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; 2251 2252// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. 2253// $Rn_wback is one past the end of the range. $Rm is the loop counter. 2254let isCodeGenOnly=1, mayStore=1 in { 2255def STGloop_wback 2256 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2257 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2258 Sched<[WriteAdr, WriteST]>; 2259 2260def STZGloop_wback 2261 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2262 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2263 Sched<[WriteAdr, WriteST]>; 2264 2265// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. 2266// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). 2267def STGloop 2268 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2269 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2270 Sched<[WriteAdr, WriteST]>; 2271 2272def STZGloop 2273 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2274 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2275 Sched<[WriteAdr, WriteST]>; 2276} 2277 2278} // Predicates = [HasMTE] 2279 2280//===----------------------------------------------------------------------===// 2281// Logical instructions. 2282//===----------------------------------------------------------------------===// 2283 2284// (immediate) 2285defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 2286defm AND : LogicalImm<0b00, "and", and, "bic">; 2287defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 2288defm ORR : LogicalImm<0b01, "orr", or, "orn">; 2289 2290// FIXME: these aliases *are* canonical sometimes (when movz can't be 2291// used). Actually, it seems to be working right now, but putting logical_immXX 2292// here is a bit dodgy on the AsmParser side too. 2293def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 2294 logical_imm32:$imm), 0>; 2295def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 2296 logical_imm64:$imm), 0>; 2297 2298 2299// (register) 2300defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 2301defm BICS : LogicalRegS<0b11, 1, "bics", 2302 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 2303defm AND : LogicalReg<0b00, 0, "and", and>; 2304defm BIC : LogicalReg<0b00, 1, "bic", 2305 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; 2306defm EON : LogicalReg<0b10, 1, "eon", 2307 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 2308defm EOR : LogicalReg<0b10, 0, "eor", xor>; 2309defm ORN : LogicalReg<0b01, 1, "orn", 2310 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 2311defm ORR : LogicalReg<0b01, 0, "orr", or>; 2312 2313def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 2314def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 2315 2316def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 2317def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 2318 2319def : InstAlias<"mvn $Wd, $Wm$sh", 2320 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 2321def : InstAlias<"mvn $Xd, $Xm$sh", 2322 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 2323 2324def : InstAlias<"tst $src1, $src2", 2325 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 2326def : InstAlias<"tst $src1, $src2", 2327 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 2328 2329def : InstAlias<"tst $src1, $src2", 2330 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 2331def : InstAlias<"tst $src1, $src2", 2332 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 2333 2334def : InstAlias<"tst $src1, $src2$sh", 2335 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 2336def : InstAlias<"tst $src1, $src2$sh", 2337 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 2338 2339 2340def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 2341def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 2342 2343 2344//===----------------------------------------------------------------------===// 2345// One operand data processing instructions. 2346//===----------------------------------------------------------------------===// 2347 2348defm CLS : OneOperandData<0b000101, "cls">; 2349defm CLZ : OneOperandData<0b000100, "clz", ctlz>; 2350defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; 2351 2352def REV16Wr : OneWRegData<0b000001, "rev16", 2353 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 2354def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; 2355 2356def : Pat<(cttz GPR32:$Rn), 2357 (CLZWr (RBITWr GPR32:$Rn))>; 2358def : Pat<(cttz GPR64:$Rn), 2359 (CLZXr (RBITXr GPR64:$Rn))>; 2360def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 2361 (i32 1))), 2362 (CLSWr GPR32:$Rn)>; 2363def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 2364 (i64 1))), 2365 (CLSXr GPR64:$Rn)>; 2366def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; 2367def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; 2368 2369// Unlike the other one operand instructions, the instructions with the "rev" 2370// mnemonic do *not* just different in the size bit, but actually use different 2371// opcode bits for the different sizes. 2372def REVWr : OneWRegData<0b000010, "rev", bswap>; 2373def REVXr : OneXRegData<0b000011, "rev", bswap>; 2374def REV32Xr : OneXRegData<0b000010, "rev32", 2375 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 2376 2377def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 2378 2379// The bswap commutes with the rotr so we want a pattern for both possible 2380// orders. 2381def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 2382def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 2383 2384// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. 2385def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; 2386def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; 2387 2388def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), 2389 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), 2390 (REV16Xr GPR64:$Rn)>; 2391 2392//===----------------------------------------------------------------------===// 2393// Bitfield immediate extraction instruction. 2394//===----------------------------------------------------------------------===// 2395let hasSideEffects = 0 in 2396defm EXTR : ExtractImm<"extr">; 2397def : InstAlias<"ror $dst, $src, $shift", 2398 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 2399def : InstAlias<"ror $dst, $src, $shift", 2400 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 2401 2402def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 2403 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 2404def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 2405 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 2406 2407//===----------------------------------------------------------------------===// 2408// Other bitfield immediate instructions. 2409//===----------------------------------------------------------------------===// 2410let hasSideEffects = 0 in { 2411defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 2412defm SBFM : BitfieldImm<0b00, "sbfm">; 2413defm UBFM : BitfieldImm<0b10, "ubfm">; 2414} 2415 2416def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2417 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 2418 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2419}]>; 2420 2421def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2422 uint64_t enc = 31 - N->getZExtValue(); 2423 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2424}]>; 2425 2426// min(7, 31 - shift_amt) 2427def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2428 uint64_t enc = 31 - N->getZExtValue(); 2429 enc = enc > 7 ? 7 : enc; 2430 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2431}]>; 2432 2433// min(15, 31 - shift_amt) 2434def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2435 uint64_t enc = 31 - N->getZExtValue(); 2436 enc = enc > 15 ? 15 : enc; 2437 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2438}]>; 2439 2440def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2441 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 2442 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2443}]>; 2444 2445def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2446 uint64_t enc = 63 - N->getZExtValue(); 2447 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2448}]>; 2449 2450// min(7, 63 - shift_amt) 2451def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2452 uint64_t enc = 63 - N->getZExtValue(); 2453 enc = enc > 7 ? 7 : enc; 2454 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2455}]>; 2456 2457// min(15, 63 - shift_amt) 2458def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2459 uint64_t enc = 63 - N->getZExtValue(); 2460 enc = enc > 15 ? 15 : enc; 2461 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2462}]>; 2463 2464// min(31, 63 - shift_amt) 2465def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 2466 uint64_t enc = 63 - N->getZExtValue(); 2467 enc = enc > 31 ? 31 : enc; 2468 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2469}]>; 2470 2471def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 2472 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 2473 (i64 (i32shift_b imm0_31:$imm)))>; 2474def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 2475 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 2476 (i64 (i64shift_b imm0_63:$imm)))>; 2477 2478let AddedComplexity = 10 in { 2479def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 2480 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2481def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 2482 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2483} 2484 2485def : InstAlias<"asr $dst, $src, $shift", 2486 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2487def : InstAlias<"asr $dst, $src, $shift", 2488 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2489def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2490def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2491def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2492def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2493def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2494 2495def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 2496 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2497def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 2498 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2499 2500def : InstAlias<"lsr $dst, $src, $shift", 2501 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2502def : InstAlias<"lsr $dst, $src, $shift", 2503 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2504def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2505def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2506def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2507def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2508def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2509 2510//===----------------------------------------------------------------------===// 2511// Conditional comparison instructions. 2512//===----------------------------------------------------------------------===// 2513defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 2514defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 2515 2516//===----------------------------------------------------------------------===// 2517// Conditional select instructions. 2518//===----------------------------------------------------------------------===// 2519defm CSEL : CondSelect<0, 0b00, "csel">; 2520 2521def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 2522defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 2523defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 2524defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 2525 2526def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2527 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2528def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2529 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2530def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2531 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2532def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2533 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2534def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2535 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2536def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2537 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2538 2539def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 2540 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 2541def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 2542 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 2543def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), 2544 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2545def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), 2546 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2547def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), 2548 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2549def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), 2550 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2551def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 2552 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 2553def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 2554 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 2555def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), 2556 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2557def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), 2558 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2559def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), 2560 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2561def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), 2562 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2563 2564def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2565 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; 2566def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2567 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; 2568 2569def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2570 (CSINCWr GPR32:$val, WZR, imm:$cc)>; 2571def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2572 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2573def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2574 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2575 2576def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2577 (CSELWr WZR, GPR32:$val, imm:$cc)>; 2578def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2579 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2580def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2581 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2582 2583// The inverse of the condition code from the alias instruction is what is used 2584// in the aliased instruction. The parser all ready inverts the condition code 2585// for these aliases. 2586def : InstAlias<"cset $dst, $cc", 2587 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2588def : InstAlias<"cset $dst, $cc", 2589 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2590 2591def : InstAlias<"csetm $dst, $cc", 2592 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2593def : InstAlias<"csetm $dst, $cc", 2594 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2595 2596def : InstAlias<"cinc $dst, $src, $cc", 2597 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2598def : InstAlias<"cinc $dst, $src, $cc", 2599 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2600 2601def : InstAlias<"cinv $dst, $src, $cc", 2602 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2603def : InstAlias<"cinv $dst, $src, $cc", 2604 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2605 2606def : InstAlias<"cneg $dst, $src, $cc", 2607 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2608def : InstAlias<"cneg $dst, $src, $cc", 2609 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2610 2611//===----------------------------------------------------------------------===// 2612// PC-relative instructions. 2613//===----------------------------------------------------------------------===// 2614let isReMaterializable = 1 in { 2615let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 2616def ADR : ADRI<0, "adr", adrlabel, 2617 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; 2618} // hasSideEffects = 0 2619 2620def ADRP : ADRI<1, "adrp", adrplabel, 2621 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 2622} // isReMaterializable = 1 2623 2624// page address of a constant pool entry, block address 2625def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; 2626def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; 2627def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; 2628def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; 2629def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 2630def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 2631def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; 2632 2633//===----------------------------------------------------------------------===// 2634// Unconditional branch (register) instructions. 2635//===----------------------------------------------------------------------===// 2636 2637let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 2638def RET : BranchReg<0b0010, "ret", []>; 2639def DRPS : SpecialReturn<0b0101, "drps">; 2640def ERET : SpecialReturn<0b0100, "eret">; 2641} // isReturn = 1, isTerminator = 1, isBarrier = 1 2642 2643// Default to the LR register. 2644def : InstAlias<"ret", (RET LR)>; 2645 2646let isCall = 1, Defs = [LR], Uses = [SP] in { 2647 def BLR : BranchReg<0b0001, "blr", []>; 2648 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, 2649 Sched<[WriteBrReg]>, 2650 PseudoInstExpansion<(BLR GPR64:$Rn)>; 2651 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 2652 Sched<[WriteBrReg]>; 2653 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, 2654 Sched<[WriteBrReg]>; 2655} // isCall 2656 2657def : Pat<(AArch64call GPR64:$Rn), 2658 (BLR GPR64:$Rn)>, 2659 Requires<[NoSLSBLRMitigation]>; 2660def : Pat<(AArch64call GPR64noip:$Rn), 2661 (BLRNoIP GPR64noip:$Rn)>, 2662 Requires<[SLSBLRMitigation]>; 2663 2664def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), 2665 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, 2666 Requires<[NoSLSBLRMitigation]>; 2667 2668def : Pat<(AArch64call_bti GPR64:$Rn), 2669 (BLR_BTI GPR64:$Rn)>, 2670 Requires<[NoSLSBLRMitigation]>; 2671def : Pat<(AArch64call_bti GPR64noip:$Rn), 2672 (BLR_BTI GPR64noip:$Rn)>, 2673 Requires<[SLSBLRMitigation]>; 2674 2675let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2676def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 2677} // isBranch, isTerminator, isBarrier, isIndirectBranch 2678 2679// Create a separate pseudo-instruction for codegen to use so that we don't 2680// flag lr as used in every function. It'll be restored before the RET by the 2681// epilogue if it's legitimately used. 2682def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>, 2683 Sched<[WriteBrReg]> { 2684 let isTerminator = 1; 2685 let isBarrier = 1; 2686 let isReturn = 1; 2687} 2688 2689// This is a directive-like pseudo-instruction. The purpose is to insert an 2690// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 2691// (which in the usual case is a BLR). 2692let hasSideEffects = 1 in 2693def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { 2694 let AsmString = ".tlsdesccall $sym"; 2695} 2696 2697// Pseudo instruction to tell the streamer to emit a 'B' character into the 2698// augmentation string. 2699def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} 2700 2701// Pseudo instruction to tell the streamer to emit a 'G' character into the 2702// augmentation string. 2703def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} 2704 2705// FIXME: maybe the scratch register used shouldn't be fixed to X1? 2706// FIXME: can "hasSideEffects be dropped? 2707// This gets lowered to an instruction sequence which takes 16 bytes 2708let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16, 2709 isCodeGenOnly = 1 in 2710def TLSDESC_CALLSEQ 2711 : Pseudo<(outs), (ins i64imm:$sym), 2712 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, 2713 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; 2714def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 2715 (TLSDESC_CALLSEQ texternalsym:$sym)>; 2716 2717//===----------------------------------------------------------------------===// 2718// Conditional branch (immediate) instruction. 2719//===----------------------------------------------------------------------===// 2720def Bcc : BranchCond<0, "b">; 2721 2722// Armv8.8-A variant form which hints to the branch predictor that 2723// this branch is very likely to go the same way nearly all the time 2724// (even though it is not known at compile time _which_ way that is). 2725def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; 2726 2727//===----------------------------------------------------------------------===// 2728// Compare-and-branch instructions. 2729//===----------------------------------------------------------------------===// 2730defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 2731defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 2732 2733//===----------------------------------------------------------------------===// 2734// Test-bit-and-branch instructions. 2735//===----------------------------------------------------------------------===// 2736defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 2737defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 2738 2739//===----------------------------------------------------------------------===// 2740// Unconditional branch (immediate) instructions. 2741//===----------------------------------------------------------------------===// 2742let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 2743def B : BranchImm<0, "b", [(br bb:$addr)]>; 2744} // isBranch, isTerminator, isBarrier 2745 2746let isCall = 1, Defs = [LR], Uses = [SP] in { 2747def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 2748} // isCall 2749def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 2750 2751//===----------------------------------------------------------------------===// 2752// Exception generation instructions. 2753//===----------------------------------------------------------------------===// 2754let isTrap = 1 in { 2755def BRK : ExceptionGeneration<0b001, 0b00, "brk", 2756 [(int_aarch64_break timm32_0_65535:$imm)]>; 2757} 2758def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 2759def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 2760def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; 2761def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 2762def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 2763def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; 2764def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 2765 2766// DCPSn defaults to an immediate operand of zero if unspecified. 2767def : InstAlias<"dcps1", (DCPS1 0)>; 2768def : InstAlias<"dcps2", (DCPS2 0)>; 2769def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; 2770 2771def UDF : UDFType<0, "udf">; 2772 2773//===----------------------------------------------------------------------===// 2774// Load instructions. 2775//===----------------------------------------------------------------------===// 2776 2777// Pair (indexed, offset) 2778defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; 2779defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; 2780defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; 2781defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; 2782defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; 2783 2784defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2785 2786// Pair (pre-indexed) 2787def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2788def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2789def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2790def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2791def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2792 2793def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2794 2795// Pair (post-indexed) 2796def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2797def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2798def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2799def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2800def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2801 2802def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2803 2804 2805// Pair (no allocate) 2806defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; 2807defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; 2808defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; 2809defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; 2810defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; 2811 2812def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 2813 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; 2814 2815def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 2816 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; 2817//--- 2818// (register offset) 2819//--- 2820 2821// Integer 2822defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 2823defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 2824defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 2825defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 2826 2827// Floating-point 2828defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>; 2829defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; 2830defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; 2831defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; 2832defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; 2833 2834// Load sign-extended half-word 2835defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 2836defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 2837 2838// Load sign-extended byte 2839defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 2840defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 2841 2842// Load sign-extended word 2843defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 2844 2845// Pre-fetch. 2846defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 2847 2848// For regular load, we do not have any alignment requirement. 2849// Thus, it is safe to directly map the vector loads with interesting 2850// addressing modes. 2851// FIXME: We could do the same for bitconvert to floating point vectors. 2852multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 2853 ValueType ScalTy, ValueType VecTy, 2854 Instruction LOADW, Instruction LOADX, 2855 SubRegIndex sub> { 2856 def : Pat<(VecTy (scalar_to_vector (ScalTy 2857 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 2858 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2859 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 2860 sub)>; 2861 2862 def : Pat<(VecTy (scalar_to_vector (ScalTy 2863 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 2864 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2865 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 2866 sub)>; 2867} 2868 2869let AddedComplexity = 10 in { 2870defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 2871defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 2872 2873defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 2874defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 2875 2876defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 2877defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 2878 2879defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 2880defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 2881 2882defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 2883defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 2884 2885defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 2886 2887defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 2888 2889 2890def : Pat <(v1i64 (scalar_to_vector (i64 2891 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 2892 ro_Wextend64:$extend))))), 2893 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 2894 2895def : Pat <(v1i64 (scalar_to_vector (i64 2896 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 2897 ro_Xextend64:$extend))))), 2898 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 2899} 2900 2901// Match all load 64 bits width whose type is compatible with FPR64 2902multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 2903 Instruction LOADW, Instruction LOADX> { 2904 2905 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2906 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2907 2908 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2909 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2910} 2911 2912let AddedComplexity = 10 in { 2913let Predicates = [IsLE] in { 2914 // We must do vector loads with LD1 in big-endian. 2915 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 2916 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 2917 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 2918 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 2919 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 2920 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>; 2921} 2922 2923defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 2924defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 2925 2926// Match all load 128 bits width whose type is compatible with FPR128 2927let Predicates = [IsLE] in { 2928 // We must do vector loads with LD1 in big-endian. 2929 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 2930 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 2931 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 2932 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 2933 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 2934 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 2935 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>; 2936 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 2937} 2938} // AddedComplexity = 10 2939 2940// zextload -> i64 2941multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 2942 Instruction INSTW, Instruction INSTX> { 2943 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2944 (SUBREG_TO_REG (i64 0), 2945 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 2946 sub_32)>; 2947 2948 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2949 (SUBREG_TO_REG (i64 0), 2950 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 2951 sub_32)>; 2952} 2953 2954let AddedComplexity = 10 in { 2955 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 2956 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 2957 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 2958 2959 // zextloadi1 -> zextloadi8 2960 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2961 2962 // extload -> zextload 2963 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2964 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2965 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2966 2967 // extloadi1 -> zextloadi8 2968 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 2969} 2970 2971 2972// zextload -> i64 2973multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 2974 Instruction INSTW, Instruction INSTX> { 2975 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2976 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2977 2978 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2979 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2980 2981} 2982 2983let AddedComplexity = 10 in { 2984 // extload -> zextload 2985 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2986 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2987 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2988 2989 // zextloadi1 -> zextloadi8 2990 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2991} 2992 2993//--- 2994// (unsigned immediate) 2995//--- 2996defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", 2997 [(set GPR64z:$Rt, 2998 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 2999defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", 3000 [(set GPR32z:$Rt, 3001 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3002defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", 3003 [(set FPR8Op:$Rt, 3004 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 3005defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", 3006 [(set (f16 FPR16Op:$Rt), 3007 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 3008defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", 3009 [(set (f32 FPR32Op:$Rt), 3010 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 3011defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", 3012 [(set (f64 FPR64Op:$Rt), 3013 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 3014defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", 3015 [(set (f128 FPR128Op:$Rt), 3016 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 3017 3018// bf16 load pattern 3019def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3020 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 3021 3022// For regular load, we do not have any alignment requirement. 3023// Thus, it is safe to directly map the vector loads with interesting 3024// addressing modes. 3025// FIXME: We could do the same for bitconvert to floating point vectors. 3026def : Pat <(v8i8 (scalar_to_vector (i32 3027 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3028 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 3029 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3030def : Pat <(v16i8 (scalar_to_vector (i32 3031 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3032 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3033 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 3034def : Pat <(v4i16 (scalar_to_vector (i32 3035 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3036 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 3037 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3038def : Pat <(v8i16 (scalar_to_vector (i32 3039 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3040 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 3041 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 3042def : Pat <(v2i32 (scalar_to_vector (i32 3043 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3044 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3045 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3046def : Pat <(v4i32 (scalar_to_vector (i32 3047 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3048 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3049 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 3050def : Pat <(v1i64 (scalar_to_vector (i64 3051 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3052 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3053def : Pat <(v2i64 (scalar_to_vector (i64 3054 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 3055 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3056 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 3057 3058// Match all load 64 bits width whose type is compatible with FPR64 3059let Predicates = [IsLE] in { 3060 // We must use LD1 to perform vector loads in big-endian. 3061 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3062 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3063 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3064 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3065 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3066 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3067 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3068 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3069 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3070 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3071 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3072 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3073} 3074def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3075 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3076def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 3077 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 3078 3079// Match all load 128 bits width whose type is compatible with FPR128 3080let Predicates = [IsLE] in { 3081 // We must use LD1 to perform vector loads in big-endian. 3082 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3083 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3084 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3085 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3086 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3087 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3088 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3089 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3090 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3091 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3092 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3093 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3094 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3095 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3096 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3097 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3098} 3099def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3100 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3101 3102defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 3103 [(set GPR32:$Rt, 3104 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 3105 uimm12s2:$offset)))]>; 3106defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 3107 [(set GPR32:$Rt, 3108 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 3109 uimm12s1:$offset)))]>; 3110// zextload -> i64 3111def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3112 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3113def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3114 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3115 3116// zextloadi1 -> zextloadi8 3117def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3118 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3119def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3120 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3121 3122// extload -> zextload 3123def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3124 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 3125def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3126 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3127def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3128 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3129def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3130 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3131def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3132 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3133def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3134 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3135def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3136 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3137 3138// load sign-extended half-word 3139defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 3140 [(set GPR32:$Rt, 3141 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3142 uimm12s2:$offset)))]>; 3143defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 3144 [(set GPR64:$Rt, 3145 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3146 uimm12s2:$offset)))]>; 3147 3148// load sign-extended byte 3149defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 3150 [(set GPR32:$Rt, 3151 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3152 uimm12s1:$offset)))]>; 3153defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 3154 [(set GPR64:$Rt, 3155 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3156 uimm12s1:$offset)))]>; 3157 3158// load sign-extended word 3159defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 3160 [(set GPR64:$Rt, 3161 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 3162 uimm12s4:$offset)))]>; 3163 3164// load zero-extended word 3165def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3166 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3167 3168// Pre-fetch. 3169def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 3170 [(AArch64Prefetch timm:$Rt, 3171 (am_indexed64 GPR64sp:$Rn, 3172 uimm12s8:$offset))]>; 3173 3174def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 3175 3176//--- 3177// (literal) 3178 3179def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ 3180 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) { 3181 const DataLayout &DL = MF->getDataLayout(); 3182 Align Align = G->getGlobal()->getPointerAlignment(DL); 3183 return Align >= 4 && G->getOffset() % 4 == 0; 3184 } 3185 if (auto *C = dyn_cast<ConstantPoolSDNode>(N)) 3186 return C->getAlign() >= 4 && C->getOffset() % 4 == 0; 3187 return false; 3188}]>; 3189 3190def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", 3191 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3192def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", 3193 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3194def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", 3195 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3196def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", 3197 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3198def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", 3199 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3200 3201// load sign-extended word 3202def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", 3203 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; 3204 3205let AddedComplexity = 20 in { 3206def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), 3207 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; 3208} 3209 3210// prefetch 3211def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 3212// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 3213 3214//--- 3215// (unscaled immediate) 3216defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", 3217 [(set GPR64z:$Rt, 3218 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3219defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", 3220 [(set GPR32z:$Rt, 3221 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3222defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", 3223 [(set FPR8Op:$Rt, 3224 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3225defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", 3226 [(set (f16 FPR16Op:$Rt), 3227 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3228defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", 3229 [(set (f32 FPR32Op:$Rt), 3230 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3231defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", 3232 [(set (f64 FPR64Op:$Rt), 3233 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3234defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", 3235 [(set (f128 FPR128Op:$Rt), 3236 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 3237 3238defm LDURHH 3239 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 3240 [(set GPR32:$Rt, 3241 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3242defm LDURBB 3243 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 3244 [(set GPR32:$Rt, 3245 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3246 3247// bf16 load pattern 3248def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3249 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 3250 3251// Match all load 64 bits width whose type is compatible with FPR64 3252let Predicates = [IsLE] in { 3253 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3254 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3255 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3256 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3257 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3258 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3259 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3260 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3261 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3262 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3263} 3264def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3265 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3266def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3267 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3268 3269// Match all load 128 bits width whose type is compatible with FPR128 3270let Predicates = [IsLE] in { 3271 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3272 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3273 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3274 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3275 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3276 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3277 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3278 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3279 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3280 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3281 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3282 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3283 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3284 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3285} 3286 3287// anyext -> zext 3288def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3289 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3290def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3291 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3292def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3293 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3294def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3295 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3296def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3297 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3298def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3299 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3300def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3301 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3302// unscaled zext 3303def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3304 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3305def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3306 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3307def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3308 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3309def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3310 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3311def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3312 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3313def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3314 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3315def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3316 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3317 3318 3319//--- 3320// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 3321 3322// Define new assembler match classes as we want to only match these when 3323// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 3324// associate a DiagnosticType either, as we want the diagnostic for the 3325// canonical form (the scaled operand) to take precedence. 3326class SImm9OffsetOperand<int Width> : AsmOperandClass { 3327 let Name = "SImm9OffsetFB" # Width; 3328 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 3329 let RenderMethod = "addImmOperands"; 3330} 3331 3332def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 3333def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 3334def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 3335def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 3336def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 3337 3338def simm9_offset_fb8 : Operand<i64> { 3339 let ParserMatchClass = SImm9OffsetFB8Operand; 3340} 3341def simm9_offset_fb16 : Operand<i64> { 3342 let ParserMatchClass = SImm9OffsetFB16Operand; 3343} 3344def simm9_offset_fb32 : Operand<i64> { 3345 let ParserMatchClass = SImm9OffsetFB32Operand; 3346} 3347def simm9_offset_fb64 : Operand<i64> { 3348 let ParserMatchClass = SImm9OffsetFB64Operand; 3349} 3350def simm9_offset_fb128 : Operand<i64> { 3351 let ParserMatchClass = SImm9OffsetFB128Operand; 3352} 3353 3354def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3355 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3356def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3357 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3358def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3359 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3360def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3361 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3362def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3363 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3364def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3365 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3366def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3367 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3368 3369// zextload -> i64 3370def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3371 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3372def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3373 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3374 3375// load sign-extended half-word 3376defm LDURSHW 3377 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 3378 [(set GPR32:$Rt, 3379 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3380defm LDURSHX 3381 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 3382 [(set GPR64:$Rt, 3383 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3384 3385// load sign-extended byte 3386defm LDURSBW 3387 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 3388 [(set GPR32:$Rt, 3389 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3390defm LDURSBX 3391 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 3392 [(set GPR64:$Rt, 3393 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3394 3395// load sign-extended word 3396defm LDURSW 3397 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 3398 [(set GPR64:$Rt, 3399 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3400 3401// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 3402def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 3403 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3404def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 3405 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3406def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3407 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3408def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3409 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3410def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3411 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3412def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3413 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3414def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 3415 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3416 3417// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros, 3418// load, 0) can use a single load. 3419multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT, 3420 ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst, 3421 ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm, 3422 SubRegIndex SubReg> { 3423 // Scaled 3424 def : Pat <(vector_insert (VT immAllZerosV), 3425 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3426 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3427 // Unscaled 3428 def : Pat <(vector_insert (VT immAllZerosV), 3429 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3430 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3431 3432 // Half-vector patterns 3433 def : Pat <(vector_insert (HVT immAllZerosV), 3434 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3435 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3436 // Unscaled 3437 def : Pat <(vector_insert (HVT immAllZerosV), 3438 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3439 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3440 3441 // SVE patterns 3442 def : Pat <(vector_insert (SVT immAllZerosV), 3443 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)), 3444 (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>; 3445 // Unscaled 3446 def : Pat <(vector_insert (SVT immAllZerosV), 3447 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)), 3448 (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>; 3449} 3450 3451defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi, 3452 am_indexed8, am_unscaled8, uimm12s1, bsub>; 3453defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi, 3454 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3455defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi, 3456 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3457defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi, 3458 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3459defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi, 3460 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3461defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi, 3462 am_indexed16, am_unscaled16, uimm12s2, hsub>; 3463defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi, 3464 am_indexed32, am_unscaled32, uimm12s4, ssub>; 3465defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi, 3466 am_indexed64, am_unscaled64, uimm12s8, dsub>; 3467 3468// Pre-fetch. 3469defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 3470 [(AArch64Prefetch timm:$Rt, 3471 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3472 3473//--- 3474// (unscaled immediate, unprivileged) 3475defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 3476defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 3477 3478defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 3479defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 3480 3481// load sign-extended half-word 3482defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 3483defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 3484 3485// load sign-extended byte 3486defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 3487defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 3488 3489// load sign-extended word 3490defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 3491 3492//--- 3493// (immediate pre-indexed) 3494def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3495def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3496def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3497def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3498def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3499def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3500def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3501 3502// load sign-extended half-word 3503def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3504def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3505 3506// load sign-extended byte 3507def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3508def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3509 3510// load zero-extended byte 3511def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3512def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3513 3514// load sign-extended word 3515def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3516 3517//--- 3518// (immediate post-indexed) 3519def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3520def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3521def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3522def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3523def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3524def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3525def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3526 3527// load sign-extended half-word 3528def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3529def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3530 3531// load sign-extended byte 3532def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3533def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3534 3535// load zero-extended byte 3536def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3537def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3538 3539// load sign-extended word 3540def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3541 3542//===----------------------------------------------------------------------===// 3543// Store instructions. 3544//===----------------------------------------------------------------------===// 3545 3546// Pair (indexed, offset) 3547// FIXME: Use dedicated range-checked addressing mode operand here. 3548defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; 3549defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; 3550defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; 3551defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; 3552defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; 3553 3554// Pair (pre-indexed) 3555def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3556def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3557def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3558def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3559def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3560 3561// Pair (post-indexed) 3562def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3563def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3564def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3565def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3566def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3567 3568// Pair (no allocate) 3569defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; 3570defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; 3571defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; 3572defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; 3573defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; 3574 3575def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 3576 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; 3577 3578def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 3579 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; 3580 3581 3582//--- 3583// (Register offset) 3584 3585// Integer 3586defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 3587defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 3588defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 3589defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 3590 3591 3592// Floating-point 3593defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>; 3594defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; 3595defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; 3596defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; 3597defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; 3598 3599let Predicates = [UseSTRQro], AddedComplexity = 10 in { 3600 def : Pat<(store (f128 FPR128:$Rt), 3601 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, 3602 ro_Wextend128:$extend)), 3603 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; 3604 def : Pat<(store (f128 FPR128:$Rt), 3605 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, 3606 ro_Xextend128:$extend)), 3607 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; 3608} 3609 3610multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 3611 Instruction STRW, Instruction STRX> { 3612 3613 def : Pat<(storeop GPR64:$Rt, 3614 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3615 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3616 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3617 3618 def : Pat<(storeop GPR64:$Rt, 3619 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3620 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3621 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3622} 3623 3624let AddedComplexity = 10 in { 3625 // truncstore i64 3626 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 3627 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 3628 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 3629} 3630 3631multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 3632 Instruction STRW, Instruction STRX> { 3633 def : Pat<(store (VecTy FPR:$Rt), 3634 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3635 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3636 3637 def : Pat<(store (VecTy FPR:$Rt), 3638 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3639 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3640} 3641 3642let AddedComplexity = 10 in { 3643// Match all store 64 bits width whose type is compatible with FPR64 3644let Predicates = [IsLE] in { 3645 // We must use ST1 to store vectors in big-endian. 3646 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 3647 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 3648 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 3649 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 3650 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 3651 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>; 3652} 3653 3654defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 3655defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 3656 3657// Match all store 128 bits width whose type is compatible with FPR128 3658let Predicates = [IsLE, UseSTRQro] in { 3659 // We must use ST1 to store vectors in big-endian. 3660 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 3661 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 3662 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 3663 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 3664 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 3665 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 3666 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 3667 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>; 3668} 3669} // AddedComplexity = 10 3670 3671// Match stores from lane 0 to the appropriate subreg's store. 3672multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 3673 ValueType VecTy, ValueType STy, 3674 SubRegIndex SubRegIdx, 3675 Instruction STRW, Instruction STRX> { 3676 3677 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3678 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3679 (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3680 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3681 3682 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3683 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3684 (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3685 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3686} 3687 3688let AddedComplexity = 19 in { 3689 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>; 3690 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>; 3691 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>; 3692 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>; 3693 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>; 3694 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>; 3695} 3696 3697//--- 3698// (unsigned immediate) 3699defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", 3700 [(store GPR64z:$Rt, 3701 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3702defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", 3703 [(store GPR32z:$Rt, 3704 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3705defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", 3706 [(store FPR8Op:$Rt, 3707 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 3708defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", 3709 [(store (f16 FPR16Op:$Rt), 3710 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 3711defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", 3712 [(store (f32 FPR32Op:$Rt), 3713 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3714defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", 3715 [(store (f64 FPR64Op:$Rt), 3716 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3717defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; 3718 3719defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", 3720 [(truncstorei16 GPR32z:$Rt, 3721 (am_indexed16 GPR64sp:$Rn, 3722 uimm12s2:$offset))]>; 3723defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", 3724 [(truncstorei8 GPR32z:$Rt, 3725 (am_indexed8 GPR64sp:$Rn, 3726 uimm12s1:$offset))]>; 3727 3728// bf16 store pattern 3729def : Pat<(store (bf16 FPR16Op:$Rt), 3730 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3731 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; 3732 3733let AddedComplexity = 10 in { 3734 3735// Match all store 64 bits width whose type is compatible with FPR64 3736def : Pat<(store (v1i64 FPR64:$Rt), 3737 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3738 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3739def : Pat<(store (v1f64 FPR64:$Rt), 3740 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3741 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3742 3743let Predicates = [IsLE] in { 3744 // We must use ST1 to store vectors in big-endian. 3745 def : Pat<(store (v2f32 FPR64:$Rt), 3746 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3747 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3748 def : Pat<(store (v8i8 FPR64:$Rt), 3749 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3750 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3751 def : Pat<(store (v4i16 FPR64:$Rt), 3752 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3753 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3754 def : Pat<(store (v2i32 FPR64:$Rt), 3755 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3756 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3757 def : Pat<(store (v4f16 FPR64:$Rt), 3758 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3759 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3760 def : Pat<(store (v4bf16 FPR64:$Rt), 3761 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3762 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3763} 3764 3765// Match all store 128 bits width whose type is compatible with FPR128 3766def : Pat<(store (f128 FPR128:$Rt), 3767 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3768 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3769 3770let Predicates = [IsLE] in { 3771 // We must use ST1 to store vectors in big-endian. 3772 def : Pat<(store (v4f32 FPR128:$Rt), 3773 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3774 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3775 def : Pat<(store (v2f64 FPR128:$Rt), 3776 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3777 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3778 def : Pat<(store (v16i8 FPR128:$Rt), 3779 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3780 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3781 def : Pat<(store (v8i16 FPR128:$Rt), 3782 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3783 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3784 def : Pat<(store (v4i32 FPR128:$Rt), 3785 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3786 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3787 def : Pat<(store (v2i64 FPR128:$Rt), 3788 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3789 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3790 def : Pat<(store (v8f16 FPR128:$Rt), 3791 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3792 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3793 def : Pat<(store (v8bf16 FPR128:$Rt), 3794 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3795 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3796} 3797 3798// truncstore i64 3799def : Pat<(truncstorei32 GPR64:$Rt, 3800 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 3801 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 3802def : Pat<(truncstorei16 GPR64:$Rt, 3803 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3804 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 3805def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 3806 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 3807 3808} // AddedComplexity = 10 3809 3810// Match stores from lane 0 to the appropriate subreg's store. 3811multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop, 3812 ValueType VTy, ValueType STy, 3813 SubRegIndex SubRegIdx, Operand IndexType, 3814 Instruction STR> { 3815 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)), 3816 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), 3817 (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3818 GPR64sp:$Rn, IndexType:$offset)>; 3819} 3820 3821let AddedComplexity = 19 in { 3822 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>; 3823 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>; 3824 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>; 3825 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>; 3826 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>; 3827 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>; 3828} 3829 3830//--- 3831// (unscaled immediate) 3832defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", 3833 [(store GPR64z:$Rt, 3834 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3835defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", 3836 [(store GPR32z:$Rt, 3837 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3838defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", 3839 [(store FPR8Op:$Rt, 3840 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3841defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", 3842 [(store (f16 FPR16Op:$Rt), 3843 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3844defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", 3845 [(store (f32 FPR32Op:$Rt), 3846 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3847defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", 3848 [(store (f64 FPR64Op:$Rt), 3849 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3850defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", 3851 [(store (f128 FPR128Op:$Rt), 3852 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 3853defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", 3854 [(truncstorei16 GPR32z:$Rt, 3855 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3856defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", 3857 [(truncstorei8 GPR32z:$Rt, 3858 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3859 3860// Armv8.4 Weaker Release Consistency enhancements 3861// LDAPR & STLR with Immediate Offset instructions 3862let Predicates = [HasRCPC_IMMO] in { 3863defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; 3864defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; 3865defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; 3866defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; 3867defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; 3868defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; 3869defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; 3870defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; 3871defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; 3872defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; 3873defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; 3874defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; 3875defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; 3876} 3877 3878// Match all store 64 bits width whose type is compatible with FPR64 3879def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3880 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3881def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3882 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3883 3884let AddedComplexity = 10 in { 3885 3886let Predicates = [IsLE] in { 3887 // We must use ST1 to store vectors in big-endian. 3888 def : Pat<(store (v2f32 FPR64:$Rt), 3889 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3890 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3891 def : Pat<(store (v8i8 FPR64:$Rt), 3892 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3893 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3894 def : Pat<(store (v4i16 FPR64:$Rt), 3895 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3896 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3897 def : Pat<(store (v2i32 FPR64:$Rt), 3898 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3899 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3900 def : Pat<(store (v4f16 FPR64:$Rt), 3901 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3902 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3903 def : Pat<(store (v4bf16 FPR64:$Rt), 3904 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3905 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3906} 3907 3908// Match all store 128 bits width whose type is compatible with FPR128 3909def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3910 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3911 3912let Predicates = [IsLE] in { 3913 // We must use ST1 to store vectors in big-endian. 3914 def : Pat<(store (v4f32 FPR128:$Rt), 3915 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3916 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3917 def : Pat<(store (v2f64 FPR128:$Rt), 3918 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3919 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3920 def : Pat<(store (v16i8 FPR128:$Rt), 3921 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3922 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3923 def : Pat<(store (v8i16 FPR128:$Rt), 3924 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3925 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3926 def : Pat<(store (v4i32 FPR128:$Rt), 3927 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3928 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3929 def : Pat<(store (v2i64 FPR128:$Rt), 3930 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3931 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3932 def : Pat<(store (v2f64 FPR128:$Rt), 3933 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3934 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3935 def : Pat<(store (v8f16 FPR128:$Rt), 3936 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3937 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3938 def : Pat<(store (v8bf16 FPR128:$Rt), 3939 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3940 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3941} 3942 3943} // AddedComplexity = 10 3944 3945// unscaled i64 truncating stores 3946def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 3947 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3948def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 3949 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3950def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 3951 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3952 3953// Match stores from lane 0 to the appropriate subreg's store. 3954multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, 3955 ValueType VTy, ValueType STy, 3956 SubRegIndex SubRegIdx, Instruction STR> { 3957 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>; 3958} 3959 3960let AddedComplexity = 19 in { 3961 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>; 3962 defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>; 3963 defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>; 3964 defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>; 3965 defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>; 3966 defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>; 3967} 3968 3969//--- 3970// STR mnemonics fall back to STUR for negative or unaligned offsets. 3971def : InstAlias<"str $Rt, [$Rn, $offset]", 3972 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3973def : InstAlias<"str $Rt, [$Rn, $offset]", 3974 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3975def : InstAlias<"str $Rt, [$Rn, $offset]", 3976 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3977def : InstAlias<"str $Rt, [$Rn, $offset]", 3978 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3979def : InstAlias<"str $Rt, [$Rn, $offset]", 3980 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3981def : InstAlias<"str $Rt, [$Rn, $offset]", 3982 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3983def : InstAlias<"str $Rt, [$Rn, $offset]", 3984 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3985 3986def : InstAlias<"strb $Rt, [$Rn, $offset]", 3987 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3988def : InstAlias<"strh $Rt, [$Rn, $offset]", 3989 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3990 3991//--- 3992// (unscaled immediate, unprivileged) 3993defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 3994defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 3995 3996defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 3997defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 3998 3999//--- 4000// (immediate pre-indexed) 4001def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; 4002def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; 4003def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>; 4004def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; 4005def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; 4006def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; 4007def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; 4008 4009def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; 4010def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; 4011 4012// truncstore i64 4013def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4014 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4015 simm9:$off)>; 4016def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4017 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4018 simm9:$off)>; 4019def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4020 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4021 simm9:$off)>; 4022 4023def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4024 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4025def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4026 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4027def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4028 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4029def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4030 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4031def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4032 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4033def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4034 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4035def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4036 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4037 4038def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4039 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4040def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4041 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4042def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4043 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4044def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4045 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4046def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4047 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4048def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4049 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4050def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4051 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4052 4053//--- 4054// (immediate post-indexed) 4055def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; 4056def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; 4057def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>; 4058def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; 4059def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; 4060def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; 4061def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; 4062 4063def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; 4064def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; 4065 4066// truncstore i64 4067def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4068 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4069 simm9:$off)>; 4070def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4071 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4072 simm9:$off)>; 4073def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 4074 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 4075 simm9:$off)>; 4076 4077def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), 4078 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; 4079 4080def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4081 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4082def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4083 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4084def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4085 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4086def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4087 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4088def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4089 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4090def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4091 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4092def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4093 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4094def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 4095 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 4096 4097def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4098 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4099def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4100 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4101def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4102 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4103def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4104 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4105def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4106 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4107def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4108 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4109def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4110 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4111def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 4112 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 4113 4114//===----------------------------------------------------------------------===// 4115// Load/store exclusive instructions. 4116//===----------------------------------------------------------------------===// 4117 4118def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 4119def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 4120def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 4121def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 4122 4123def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 4124def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 4125def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 4126def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 4127 4128def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 4129def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 4130def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 4131def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 4132 4133def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 4134def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 4135def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 4136def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 4137 4138/* 4139Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn 4140of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an 4141alias for the case of immediate #0. This is because new STLR versions (from 4142LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not 4143appropriate anymore (it parses and discards the optional zero). This is not the 4144case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, 4145and the immediate values are not inside the [] brackets and thus not accepted 4146by GPR64sp0 parser. 4147*/ 4148def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; 4149def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; 4150def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; 4151def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; 4152 4153def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 4154def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 4155def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 4156def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 4157 4158def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 4159def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 4160def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 4161def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 4162 4163def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 4164def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 4165 4166def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 4167def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 4168 4169def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 4170def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 4171 4172def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 4173def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 4174 4175let Predicates = [HasLOR] in { 4176 // v8.1a "Limited Order Region" extension load-acquire instructions 4177 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 4178 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 4179 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 4180 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 4181 4182 // v8.1a "Limited Order Region" extension store-release instructions 4183 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 4184 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 4185 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 4186 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 4187 4188 // Aliases for when offset=0 4189 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; 4190 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; 4191 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; 4192 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; 4193} 4194 4195//===----------------------------------------------------------------------===// 4196// Scaled floating point to integer conversion instructions. 4197//===----------------------------------------------------------------------===// 4198 4199defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 4200defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 4201defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 4202defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 4203defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 4204defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 4205defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 4206defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 4207defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4208defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4209defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4210defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4211 4212// AArch64's FCVT instructions saturate when out of range. 4213multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> { 4214 let Predicates = [HasFullFP16] in { 4215 def : Pat<(i32 (to_int_sat f16:$Rn, i32)), 4216 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4217 def : Pat<(i64 (to_int_sat f16:$Rn, i64)), 4218 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4219 } 4220 def : Pat<(i32 (to_int_sat f32:$Rn, i32)), 4221 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4222 def : Pat<(i64 (to_int_sat f32:$Rn, i64)), 4223 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4224 def : Pat<(i32 (to_int_sat f64:$Rn, i32)), 4225 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4226 def : Pat<(i64 (to_int_sat f64:$Rn, i64)), 4227 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4228 4229 let Predicates = [HasFullFP16] in { 4230 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), 4231 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4232 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), 4233 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4234 } 4235 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), 4236 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4237 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), 4238 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4239 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), 4240 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4241 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), 4242 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4243} 4244 4245defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">; 4246defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">; 4247 4248multiclass FPToIntegerIntPats<Intrinsic round, string INST> { 4249 let Predicates = [HasFullFP16] in { 4250 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; 4251 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; 4252 } 4253 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; 4254 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; 4255 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; 4256 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; 4257 4258 let Predicates = [HasFullFP16] in { 4259 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), 4260 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4261 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), 4262 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4263 } 4264 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), 4265 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4266 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), 4267 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4268 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), 4269 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4270 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), 4271 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4272} 4273 4274defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; 4275defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; 4276 4277multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { 4278 def : Pat<(i32 (to_int (round f32:$Rn))), 4279 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4280 def : Pat<(i64 (to_int (round f32:$Rn))), 4281 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4282 def : Pat<(i32 (to_int (round f64:$Rn))), 4283 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4284 def : Pat<(i64 (to_int (round f64:$Rn))), 4285 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4286 4287 // These instructions saturate like fp_to_[su]int_sat. 4288 let Predicates = [HasFullFP16] in { 4289 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), 4290 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4291 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), 4292 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4293 } 4294 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), 4295 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4296 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), 4297 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4298 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), 4299 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4300 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), 4301 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4302} 4303 4304defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; 4305defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; 4306defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; 4307defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; 4308defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; 4309defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; 4310defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; 4311defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; 4312 4313 4314 4315let Predicates = [HasFullFP16] in { 4316 def : Pat<(i32 (any_lround f16:$Rn)), 4317 (FCVTASUWHr f16:$Rn)>; 4318 def : Pat<(i64 (any_lround f16:$Rn)), 4319 (FCVTASUXHr f16:$Rn)>; 4320 def : Pat<(i64 (any_llround f16:$Rn)), 4321 (FCVTASUXHr f16:$Rn)>; 4322} 4323def : Pat<(i32 (any_lround f32:$Rn)), 4324 (FCVTASUWSr f32:$Rn)>; 4325def : Pat<(i32 (any_lround f64:$Rn)), 4326 (FCVTASUWDr f64:$Rn)>; 4327def : Pat<(i64 (any_lround f32:$Rn)), 4328 (FCVTASUXSr f32:$Rn)>; 4329def : Pat<(i64 (any_lround f64:$Rn)), 4330 (FCVTASUXDr f64:$Rn)>; 4331def : Pat<(i64 (any_llround f32:$Rn)), 4332 (FCVTASUXSr f32:$Rn)>; 4333def : Pat<(i64 (any_llround f64:$Rn)), 4334 (FCVTASUXDr f64:$Rn)>; 4335 4336//===----------------------------------------------------------------------===// 4337// Scaled integer to floating point conversion instructions. 4338//===----------------------------------------------------------------------===// 4339 4340defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; 4341defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; 4342 4343//===----------------------------------------------------------------------===// 4344// Unscaled integer to floating point conversion instruction. 4345//===----------------------------------------------------------------------===// 4346 4347defm FMOV : UnscaledConversion<"fmov">; 4348 4349// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 4350let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { 4351def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, 4352 Sched<[WriteF]>; 4353def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 4354 Sched<[WriteF]>; 4355def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 4356 Sched<[WriteF]>; 4357} 4358// Similarly add aliases 4359def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, 4360 Requires<[HasFullFP16]>; 4361def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; 4362def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; 4363 4364// Pattern for FP16 immediates 4365let Predicates = [HasFullFP16] in { 4366 def : Pat<(f16 fpimm:$in), 4367 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; 4368} 4369 4370//===----------------------------------------------------------------------===// 4371// Floating point conversion instruction. 4372//===----------------------------------------------------------------------===// 4373 4374defm FCVT : FPConversion<"fcvt">; 4375 4376//===----------------------------------------------------------------------===// 4377// Floating point single operand instructions. 4378//===----------------------------------------------------------------------===// 4379 4380defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; 4381defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; 4382defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; 4383defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; 4384defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; 4385defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; 4386defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; 4387defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; 4388 4389defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; 4390defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; 4391 4392let SchedRW = [WriteFDiv] in { 4393defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; 4394} 4395 4396let Predicates = [HasFRInt3264] in { 4397 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; 4398 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; 4399 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; 4400 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; 4401} // HasFRInt3264 4402 4403// Emitting strict_lrint as two instructions is valid as any exceptions that 4404// occur will happen in exactly one of the instructions (e.g. if the input is 4405// not an integer the inexact exception will happen in the FRINTX but not then 4406// in the FCVTZS as the output of FRINTX is an integer). 4407let Predicates = [HasFullFP16] in { 4408 def : Pat<(i32 (any_lrint f16:$Rn)), 4409 (FCVTZSUWHr (FRINTXHr f16:$Rn))>; 4410 def : Pat<(i64 (any_lrint f16:$Rn)), 4411 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4412 def : Pat<(i64 (any_llrint f16:$Rn)), 4413 (FCVTZSUXHr (FRINTXHr f16:$Rn))>; 4414} 4415def : Pat<(i32 (any_lrint f32:$Rn)), 4416 (FCVTZSUWSr (FRINTXSr f32:$Rn))>; 4417def : Pat<(i32 (any_lrint f64:$Rn)), 4418 (FCVTZSUWDr (FRINTXDr f64:$Rn))>; 4419def : Pat<(i64 (any_lrint f32:$Rn)), 4420 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4421def : Pat<(i64 (any_lrint f64:$Rn)), 4422 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4423def : Pat<(i64 (any_llrint f32:$Rn)), 4424 (FCVTZSUXSr (FRINTXSr f32:$Rn))>; 4425def : Pat<(i64 (any_llrint f64:$Rn)), 4426 (FCVTZSUXDr (FRINTXDr f64:$Rn))>; 4427 4428//===----------------------------------------------------------------------===// 4429// Floating point two operand instructions. 4430//===----------------------------------------------------------------------===// 4431 4432defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; 4433let SchedRW = [WriteFDiv] in { 4434defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; 4435} 4436defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; 4437defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; 4438defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; 4439defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; 4440let SchedRW = [WriteFMul] in { 4441defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; 4442defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; 4443} 4444defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; 4445 4446multiclass FMULScalarFromIndexedLane0Patterns<string inst, 4447 string inst_f16_suffix, 4448 string inst_f32_suffix, 4449 string inst_f64_suffix, 4450 SDPatternOperator OpNode, 4451 list<Predicate> preds = []> { 4452 let Predicates = !listconcat(preds, [HasFullFP16]) in { 4453 def : Pat<(f16 (OpNode (f16 FPR16:$Rn), 4454 (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))), 4455 (!cast<Instruction>(inst # inst_f16_suffix) 4456 FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>; 4457 } 4458 let Predicates = preds in { 4459 def : Pat<(f32 (OpNode (f32 FPR32:$Rn), 4460 (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))), 4461 (!cast<Instruction>(inst # inst_f32_suffix) 4462 FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>; 4463 def : Pat<(f64 (OpNode (f64 FPR64:$Rn), 4464 (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))), 4465 (!cast<Instruction>(inst # inst_f64_suffix) 4466 FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>; 4467 } 4468} 4469 4470defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr", 4471 any_fmul>; 4472 4473// Match reassociated forms of FNMUL. 4474def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), 4475 (FNMULHrr FPR16:$a, FPR16:$b)>, 4476 Requires<[HasFullFP16]>; 4477def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), 4478 (FNMULSrr FPR32:$a, FPR32:$b)>; 4479def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), 4480 (FNMULDrr FPR64:$a, FPR64:$b)>; 4481 4482def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4483 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 4484def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4485 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 4486def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4487 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 4488def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4489 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 4490 4491//===----------------------------------------------------------------------===// 4492// Floating point three operand instructions. 4493//===----------------------------------------------------------------------===// 4494 4495defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; 4496defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 4497 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 4498defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 4499 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; 4500defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 4501 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 4502 4503// The following def pats catch the case where the LHS of an FMA is negated. 4504// The TriOpFrag above catches the case where the middle operand is negated. 4505 4506// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 4507// the NEON variant. 4508 4509// Here we handle first -(a + b*c) for FNMADD: 4510 4511let Predicates = [HasNEON, HasFullFP16] in 4512def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), 4513 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4514 4515def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 4516 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4517 4518def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 4519 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4520 4521// Now it's time for "(-a) + (-b)*c" 4522 4523let Predicates = [HasNEON, HasFullFP16] in 4524def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), 4525 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4526 4527def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 4528 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4529 4530def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 4531 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4532 4533//===----------------------------------------------------------------------===// 4534// Floating point comparison instructions. 4535//===----------------------------------------------------------------------===// 4536 4537defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; 4538defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; 4539 4540//===----------------------------------------------------------------------===// 4541// Floating point conditional comparison instructions. 4542//===----------------------------------------------------------------------===// 4543 4544defm FCCMPE : FPCondComparison<1, "fccmpe">; 4545defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 4546 4547//===----------------------------------------------------------------------===// 4548// Floating point conditional select instruction. 4549//===----------------------------------------------------------------------===// 4550 4551defm FCSEL : FPCondSelect<"fcsel">; 4552 4553let Predicates = [HasFullFP16] in 4554def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), 4555 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; 4556 4557// CSEL instructions providing f128 types need to be handled by a 4558// pseudo-instruction since the eventual code will need to introduce basic 4559// blocks and control flow. 4560def F128CSEL : Pseudo<(outs FPR128:$Rd), 4561 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 4562 [(set (f128 FPR128:$Rd), 4563 (AArch64csel FPR128:$Rn, FPR128:$Rm, 4564 (i32 imm:$cond), NZCV))]> { 4565 let Uses = [NZCV]; 4566 let usesCustomInserter = 1; 4567 let hasNoSchedulingInfo = 1; 4568} 4569 4570//===----------------------------------------------------------------------===// 4571// Instructions used for emitting unwind opcodes on ARM64 Windows. 4572//===----------------------------------------------------------------------===// 4573let isPseudo = 1 in { 4574 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; 4575 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4576 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4577 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4578 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4579 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4580 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4581 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4582 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4583 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4584 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4585 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; 4586 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4587 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; 4588 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4589 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; 4590 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4591 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; 4592} 4593 4594// Pseudo instructions for Windows EH 4595//===----------------------------------------------------------------------===// 4596let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 4597 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { 4598 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; 4599 let usesCustomInserter = 1 in 4600 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, 4601 Sched<[]>; 4602} 4603 4604// Pseudo instructions for homogeneous prolog/epilog 4605let isPseudo = 1 in { 4606 // Save CSRs in order, {FPOffset} 4607 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4608 // Restore CSRs in order 4609 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4610} 4611 4612//===----------------------------------------------------------------------===// 4613// Floating point immediate move. 4614//===----------------------------------------------------------------------===// 4615 4616let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 4617defm FMOV : FPMoveImmediate<"fmov">; 4618} 4619 4620//===----------------------------------------------------------------------===// 4621// Advanced SIMD two vector instructions. 4622//===----------------------------------------------------------------------===// 4623 4624defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 4625 AArch64uabd>; 4626// Match UABDL in log2-shuffle patterns. 4627def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), 4628 (zext (v8i8 V64:$opB))))), 4629 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4630def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4631 (v8i16 (add (sub (zext (v8i8 V64:$opA)), 4632 (zext (v8i8 V64:$opB))), 4633 (AArch64vashr v8i16:$src, (i32 15))))), 4634 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4635def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4636 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), 4637 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4638def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4639 (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4640 (zext (extract_high_v16i8 (v16i8 V128:$opB)))), 4641 (AArch64vashr v8i16:$src, (i32 15))))), 4642 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4643def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), 4644 (zext (v4i16 V64:$opB))))), 4645 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 4646def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), 4647 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), 4648 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 4649def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), 4650 (zext (v2i32 V64:$opB))))), 4651 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 4652def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), 4653 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), 4654 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 4655 4656defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; 4657defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 4658defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 4659defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 4660defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 4661defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 4662defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 4663defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 4664defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 4665defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; 4666 4667def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), 4668 (CMLTv8i8rz V64:$Rn)>; 4669def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), 4670 (CMLTv4i16rz V64:$Rn)>; 4671def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), 4672 (CMLTv2i32rz V64:$Rn)>; 4673def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), 4674 (CMLTv16i8rz V128:$Rn)>; 4675def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), 4676 (CMLTv8i16rz V128:$Rn)>; 4677def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), 4678 (CMLTv4i32rz V128:$Rn)>; 4679def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), 4680 (CMLTv2i64rz V128:$Rn)>; 4681 4682defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 4683defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 4684defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 4685defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 4686defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 4687defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 4688defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 4689defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 4690def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 4691 (FCVTLv4i16 V64:$Rn)>; 4692def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 4693 (i64 4)))), 4694 (FCVTLv8i16 V128:$Rn)>; 4695def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), 4696 (FCVTLv2i32 V64:$Rn)>; 4697def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))), 4698 (FCVTLv4i32 V128:$Rn)>; 4699def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), 4700 (FCVTLv4i16 V64:$Rn)>; 4701def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))), 4702 (FCVTLv8i16 V128:$Rn)>; 4703 4704defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 4705defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 4706defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 4707defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 4708defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 4709def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 4710 (FCVTNv4i16 V128:$Rn)>; 4711def : Pat<(concat_vectors V64:$Rd, 4712 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 4713 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4714def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), 4715 (FCVTNv2i32 V128:$Rn)>; 4716def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), 4717 (FCVTNv4i16 V128:$Rn)>; 4718def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), 4719 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4720def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))), 4721 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4722defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 4723defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 4724defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 4725 int_aarch64_neon_fcvtxn>; 4726defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; 4727defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; 4728 4729// AArch64's FCVT instructions saturate when out of range. 4730multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { 4731 let Predicates = [HasFullFP16] in { 4732 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), 4733 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; 4734 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), 4735 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; 4736 } 4737 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), 4738 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; 4739 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), 4740 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; 4741 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), 4742 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; 4743} 4744defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">; 4745defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">; 4746 4747def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; 4748def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; 4749def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; 4750def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; 4751def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; 4752 4753def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; 4754def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; 4755def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; 4756def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; 4757def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; 4758 4759defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; 4760defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 4761defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; 4762defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; 4763defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; 4764defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; 4765defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; 4766defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; 4767defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; 4768 4769let Predicates = [HasFRInt3264] in { 4770 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; 4771 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; 4772 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; 4773 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; 4774} // HasFRInt3264 4775 4776defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 4777defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; 4778defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 4779 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 4780defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 4781// Aliases for MVN -> NOT. 4782def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 4783 (NOTv8i8 V64:$Vd, V64:$Vn)>; 4784def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 4785 (NOTv16i8 V128:$Vd, V128:$Vn)>; 4786 4787def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4788def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4789def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4790def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4791def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4792def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4793 4794defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; 4795defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 4796defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 4797defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 4798defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 4799 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; 4800defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; 4801defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; 4802defm SHLL : SIMDVectorLShiftLongBySizeBHS; 4803defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 4804defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 4805defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 4806defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 4807defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 4808defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 4809 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; 4810defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; 4811defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; 4812defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 4813defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 4814defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 4815defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 4816defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 4817 4818def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4819def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4820def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4821def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4822def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4823def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4824def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4825def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4826def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 4827def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 4828 4829// Patterns for vector long shift (by element width). These need to match all 4830// three of zext, sext and anyext so it's easier to pull the patterns out of the 4831// definition. 4832multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 4833 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 4834 (SHLLv8i8 V64:$Rn)>; 4835 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), 4836 (SHLLv16i8 V128:$Rn)>; 4837 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 4838 (SHLLv4i16 V64:$Rn)>; 4839 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), 4840 (SHLLv8i16 V128:$Rn)>; 4841 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 4842 (SHLLv2i32 V64:$Rn)>; 4843 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), 4844 (SHLLv4i32 V128:$Rn)>; 4845} 4846 4847defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 4848defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 4849defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 4850 4851// Constant vector values, used in the S/UQXTN patterns below. 4852def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; 4853def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; 4854def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; 4855def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; 4856def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; 4857def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; 4858 4859// trunc(umin(X, 255)) -> UQXTRN v8i8 4860def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), 4861 (UQXTNv8i8 V128:$Vn)>; 4862// trunc(umin(X, 65535)) -> UQXTRN v4i16 4863def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), 4864 (UQXTNv4i16 V128:$Vn)>; 4865// trunc(smin(smax(X, -128), 128)) -> SQXTRN 4866// with reversed min/max 4867def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4868 (v8i16 VImm7F)))), 4869 (SQXTNv8i8 V128:$Vn)>; 4870def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4871 (v8i16 VImm80)))), 4872 (SQXTNv8i8 V128:$Vn)>; 4873// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN 4874// with reversed min/max 4875def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4876 (v4i32 VImm7FFF)))), 4877 (SQXTNv4i16 V128:$Vn)>; 4878def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4879 (v4i32 VImm8000)))), 4880 (SQXTNv4i16 V128:$Vn)>; 4881 4882// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) 4883// with reversed min/max 4884def : Pat<(v16i8 (concat_vectors 4885 (v8i8 V64:$Vd), 4886 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4887 (v8i16 VImm7F)))))), 4888 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4889def : Pat<(v16i8 (concat_vectors 4890 (v8i8 V64:$Vd), 4891 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4892 (v8i16 VImm80)))))), 4893 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4894 4895// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) 4896// with reversed min/max 4897def : Pat<(v8i16 (concat_vectors 4898 (v4i16 V64:$Vd), 4899 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4900 (v4i32 VImm7FFF)))))), 4901 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4902def : Pat<(v8i16 (concat_vectors 4903 (v4i16 V64:$Vd), 4904 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4905 (v4i32 VImm8000)))))), 4906 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4907 4908//===----------------------------------------------------------------------===// 4909// Advanced SIMD three vector instructions. 4910//===----------------------------------------------------------------------===// 4911 4912defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 4913defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; 4914defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 4915defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 4916defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 4917defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 4918defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 4919defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 4920foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { 4921def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>; 4922} 4923defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 4924let Predicates = [HasNEON] in { 4925foreach VT = [ v2f32, v4f32, v2f64 ] in 4926def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4927} 4928let Predicates = [HasNEON, HasFullFP16] in { 4929foreach VT = [ v4f16, v8f16 ] in 4930def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4931} 4932defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>; 4933defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>; 4934defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; 4935defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; 4936defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 4937defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 4938defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 4939defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; 4940defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 4941defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; 4942defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 4943defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; 4944defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 4945defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; 4946defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 4947defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; 4948 4949// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 4950// instruction expects the addend first, while the fma intrinsic puts it last. 4951defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 4952 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 4953defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 4954 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 4955 4956defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 4957defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; 4958defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 4959defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 4960defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; 4961 4962// MLA and MLS are generated in MachineCombine 4963defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; 4964defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; 4965 4966defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 4967defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 4968defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 4969 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 4970defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 4971defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; 4972defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 4973defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 4974defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 4975defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 4976defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 4977defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 4978defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 4979defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 4980defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 4981defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 4982defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 4983defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; 4984defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 4985defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 4986defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 4987defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 4988 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 4989defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 4990defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; 4991defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 4992defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 4993defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 4994defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 4995defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 4996defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 4997defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 4998defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 4999defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 5000defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; 5001defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 5002defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 5003defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 5004 int_aarch64_neon_sqrdmlah>; 5005defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 5006 int_aarch64_neon_sqrdmlsh>; 5007 5008// Extra saturate patterns, other than the intrinsics matches above 5009defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; 5010defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; 5011defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; 5012defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; 5013 5014defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 5015defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 5016 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 5017defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 5018defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 5019 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 5020defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 5021 5022// Pseudo bitwise select pattern BSP. 5023// It is expanded into BSL/BIT/BIF after register allocation. 5024defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS), 5025 (and (vnot node:$LHS), node:$RHS))>>; 5026defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; 5027defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 5028defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; 5029 5030def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 5031 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5032def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 5033 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5034def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 5035 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5036def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 5037 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 5038 5039def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 5040 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5041def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 5042 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5043def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 5044 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5045def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 5046 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 5047 5048def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 5049 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 5050def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 5051 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5052def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 5053 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5054def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 5055 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 5056 5057def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 5058 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 5059def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 5060 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5061def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 5062 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5063def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 5064 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 5065 5066def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 5067 "|cmls.8b\t$dst, $src1, $src2}", 5068 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5069def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 5070 "|cmls.16b\t$dst, $src1, $src2}", 5071 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5072def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 5073 "|cmls.4h\t$dst, $src1, $src2}", 5074 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5075def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 5076 "|cmls.8h\t$dst, $src1, $src2}", 5077 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5078def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 5079 "|cmls.2s\t$dst, $src1, $src2}", 5080 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5081def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 5082 "|cmls.4s\t$dst, $src1, $src2}", 5083 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5084def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 5085 "|cmls.2d\t$dst, $src1, $src2}", 5086 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5087 5088def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 5089 "|cmlo.8b\t$dst, $src1, $src2}", 5090 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5091def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 5092 "|cmlo.16b\t$dst, $src1, $src2}", 5093 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5094def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 5095 "|cmlo.4h\t$dst, $src1, $src2}", 5096 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5097def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 5098 "|cmlo.8h\t$dst, $src1, $src2}", 5099 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5100def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 5101 "|cmlo.2s\t$dst, $src1, $src2}", 5102 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5103def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 5104 "|cmlo.4s\t$dst, $src1, $src2}", 5105 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5106def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 5107 "|cmlo.2d\t$dst, $src1, $src2}", 5108 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5109 5110def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 5111 "|cmle.8b\t$dst, $src1, $src2}", 5112 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5113def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 5114 "|cmle.16b\t$dst, $src1, $src2}", 5115 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5116def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 5117 "|cmle.4h\t$dst, $src1, $src2}", 5118 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5119def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 5120 "|cmle.8h\t$dst, $src1, $src2}", 5121 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5122def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 5123 "|cmle.2s\t$dst, $src1, $src2}", 5124 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5125def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 5126 "|cmle.4s\t$dst, $src1, $src2}", 5127 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5128def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 5129 "|cmle.2d\t$dst, $src1, $src2}", 5130 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5131 5132def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 5133 "|cmlt.8b\t$dst, $src1, $src2}", 5134 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 5135def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 5136 "|cmlt.16b\t$dst, $src1, $src2}", 5137 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 5138def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 5139 "|cmlt.4h\t$dst, $src1, $src2}", 5140 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 5141def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 5142 "|cmlt.8h\t$dst, $src1, $src2}", 5143 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 5144def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 5145 "|cmlt.2s\t$dst, $src1, $src2}", 5146 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 5147def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 5148 "|cmlt.4s\t$dst, $src1, $src2}", 5149 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 5150def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 5151 "|cmlt.2d\t$dst, $src1, $src2}", 5152 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 5153 5154let Predicates = [HasNEON, HasFullFP16] in { 5155def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 5156 "|fcmle.4h\t$dst, $src1, $src2}", 5157 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5158def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 5159 "|fcmle.8h\t$dst, $src1, $src2}", 5160 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5161} 5162def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 5163 "|fcmle.2s\t$dst, $src1, $src2}", 5164 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5165def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 5166 "|fcmle.4s\t$dst, $src1, $src2}", 5167 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5168def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 5169 "|fcmle.2d\t$dst, $src1, $src2}", 5170 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5171 5172let Predicates = [HasNEON, HasFullFP16] in { 5173def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 5174 "|fcmlt.4h\t$dst, $src1, $src2}", 5175 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5176def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 5177 "|fcmlt.8h\t$dst, $src1, $src2}", 5178 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5179} 5180def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 5181 "|fcmlt.2s\t$dst, $src1, $src2}", 5182 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5183def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 5184 "|fcmlt.4s\t$dst, $src1, $src2}", 5185 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5186def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 5187 "|fcmlt.2d\t$dst, $src1, $src2}", 5188 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5189 5190let Predicates = [HasNEON, HasFullFP16] in { 5191def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 5192 "|facle.4h\t$dst, $src1, $src2}", 5193 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5194def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 5195 "|facle.8h\t$dst, $src1, $src2}", 5196 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5197} 5198def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 5199 "|facle.2s\t$dst, $src1, $src2}", 5200 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5201def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 5202 "|facle.4s\t$dst, $src1, $src2}", 5203 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5204def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 5205 "|facle.2d\t$dst, $src1, $src2}", 5206 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5207 5208let Predicates = [HasNEON, HasFullFP16] in { 5209def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 5210 "|faclt.4h\t$dst, $src1, $src2}", 5211 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5212def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 5213 "|faclt.8h\t$dst, $src1, $src2}", 5214 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5215} 5216def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 5217 "|faclt.2s\t$dst, $src1, $src2}", 5218 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5219def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 5220 "|faclt.4s\t$dst, $src1, $src2}", 5221 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5222def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 5223 "|faclt.2d\t$dst, $src1, $src2}", 5224 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5225 5226//===----------------------------------------------------------------------===// 5227// Advanced SIMD three scalar instructions. 5228//===----------------------------------------------------------------------===// 5229 5230defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 5231defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 5232defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 5233defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 5234defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 5235defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 5236defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 5237defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 5238def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5239 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 5240let Predicates = [HasNEON, HasFullFP16] in { 5241def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; 5242} 5243let Predicates = [HasNEON] in { 5244def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; 5245def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; 5246} 5247defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 5248 int_aarch64_neon_facge>; 5249defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 5250 int_aarch64_neon_facgt>; 5251defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5252defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5253defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5254defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; 5255defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; 5256defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; 5257defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 5258defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 5259defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 5260defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 5261defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 5262defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 5263defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 5264defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 5265defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 5266defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 5267defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 5268defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 5269defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 5270defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 5271defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 5272let Predicates = [HasRDM] in { 5273 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 5274 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 5275 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5276 (i32 FPR32:$Rm))), 5277 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5278 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5279 (i32 FPR32:$Rm))), 5280 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5281} 5282 5283defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64", 5284 int_aarch64_neon_fmulx, 5285 [HasNEONorSME]>; 5286 5287def : InstAlias<"cmls $dst, $src1, $src2", 5288 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5289def : InstAlias<"cmle $dst, $src1, $src2", 5290 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5291def : InstAlias<"cmlo $dst, $src1, $src2", 5292 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5293def : InstAlias<"cmlt $dst, $src1, $src2", 5294 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5295def : InstAlias<"fcmle $dst, $src1, $src2", 5296 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5297def : InstAlias<"fcmle $dst, $src1, $src2", 5298 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5299def : InstAlias<"fcmlt $dst, $src1, $src2", 5300 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5301def : InstAlias<"fcmlt $dst, $src1, $src2", 5302 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5303def : InstAlias<"facle $dst, $src1, $src2", 5304 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5305def : InstAlias<"facle $dst, $src1, $src2", 5306 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5307def : InstAlias<"faclt $dst, $src1, $src2", 5308 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5309def : InstAlias<"faclt $dst, $src1, $src2", 5310 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5311 5312//===----------------------------------------------------------------------===// 5313// Advanced SIMD three scalar instructions (mixed operands). 5314//===----------------------------------------------------------------------===// 5315defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 5316 int_aarch64_neon_sqdmulls_scalar>; 5317defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 5318defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 5319 5320def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 5321 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5322 (i32 FPR32:$Rm))))), 5323 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5324def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 5325 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5326 (i32 FPR32:$Rm))))), 5327 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5328 5329//===----------------------------------------------------------------------===// 5330// Advanced SIMD two scalar instructions. 5331//===----------------------------------------------------------------------===// 5332 5333defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; 5334defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 5335defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 5336defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 5337defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 5338defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 5339defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 5340defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 5341defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 5342defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 5343defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 5344defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 5345defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 5346defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 5347defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 5348defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 5349defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 5350defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 5351defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 5352def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 5353defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 5354defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 5355defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; 5356defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; 5357defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; 5358defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 5359 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5360defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 5361defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5362defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5363defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 5364defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 5365defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 5366 int_aarch64_neon_suqadd>; 5367defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 5368defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 5369defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 5370 int_aarch64_neon_usqadd>; 5371 5372def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), 5373 (CMLTv1i64rz V64:$Rn)>; 5374 5375def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 5376 (FCVTASv1i64 FPR64:$Rn)>; 5377def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 5378 (FCVTAUv1i64 FPR64:$Rn)>; 5379def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 5380 (FCVTMSv1i64 FPR64:$Rn)>; 5381def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 5382 (FCVTMUv1i64 FPR64:$Rn)>; 5383def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 5384 (FCVTNSv1i64 FPR64:$Rn)>; 5385def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 5386 (FCVTNUv1i64 FPR64:$Rn)>; 5387def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 5388 (FCVTPSv1i64 FPR64:$Rn)>; 5389def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 5390 (FCVTPUv1i64 FPR64:$Rn)>; 5391def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 5392 (FCVTZSv1i64 FPR64:$Rn)>; 5393def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 5394 (FCVTZUv1i64 FPR64:$Rn)>; 5395 5396def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), 5397 (FRECPEv1f16 FPR16:$Rn)>; 5398def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 5399 (FRECPEv1i32 FPR32:$Rn)>; 5400def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 5401 (FRECPEv1i64 FPR64:$Rn)>; 5402def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 5403 (FRECPEv1i64 FPR64:$Rn)>; 5404 5405def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), 5406 (FRECPEv1i32 FPR32:$Rn)>; 5407def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), 5408 (FRECPEv2f32 V64:$Rn)>; 5409def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), 5410 (FRECPEv4f32 FPR128:$Rn)>; 5411def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), 5412 (FRECPEv1i64 FPR64:$Rn)>; 5413def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), 5414 (FRECPEv1i64 FPR64:$Rn)>; 5415def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), 5416 (FRECPEv2f64 FPR128:$Rn)>; 5417 5418def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5419 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; 5420def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5421 (FRECPSv2f32 V64:$Rn, V64:$Rm)>; 5422def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5423 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5424def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5425 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; 5426def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5427 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5428 5429def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), 5430 (FRECPXv1f16 FPR16:$Rn)>; 5431def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 5432 (FRECPXv1i32 FPR32:$Rn)>; 5433def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 5434 (FRECPXv1i64 FPR64:$Rn)>; 5435 5436def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), 5437 (FRSQRTEv1f16 FPR16:$Rn)>; 5438def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 5439 (FRSQRTEv1i32 FPR32:$Rn)>; 5440def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 5441 (FRSQRTEv1i64 FPR64:$Rn)>; 5442def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 5443 (FRSQRTEv1i64 FPR64:$Rn)>; 5444 5445def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), 5446 (FRSQRTEv1i32 FPR32:$Rn)>; 5447def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), 5448 (FRSQRTEv2f32 V64:$Rn)>; 5449def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), 5450 (FRSQRTEv4f32 FPR128:$Rn)>; 5451def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), 5452 (FRSQRTEv1i64 FPR64:$Rn)>; 5453def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), 5454 (FRSQRTEv1i64 FPR64:$Rn)>; 5455def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), 5456 (FRSQRTEv2f64 FPR128:$Rn)>; 5457 5458def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5459 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; 5460def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5461 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; 5462def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5463 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5464def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5465 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; 5466def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5467 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5468 5469// Some float -> int -> float conversion patterns for which we want to keep the 5470// int values in FP registers using the corresponding NEON instructions to 5471// avoid more costly int <-> fp register transfers. 5472let Predicates = [HasNEON] in { 5473def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), 5474 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; 5475def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), 5476 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; 5477def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), 5478 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; 5479def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), 5480 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; 5481 5482let Predicates = [HasFullFP16] in { 5483def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), 5484 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; 5485def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), 5486 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; 5487} 5488// If an integer is about to be converted to a floating point value, 5489// just load it on the floating point unit. 5490// Here are the patterns for 8 and 16-bits to float. 5491// 8-bits -> float. 5492multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 5493 SDPatternOperator loadop, Instruction UCVTF, 5494 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 5495 SubRegIndex sub> { 5496 def : Pat<(DstTy (uint_to_fp (SrcTy 5497 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 5498 ro.Wext:$extend))))), 5499 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5500 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 5501 sub))>; 5502 5503 def : Pat<(DstTy (uint_to_fp (SrcTy 5504 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 5505 ro.Wext:$extend))))), 5506 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5507 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 5508 sub))>; 5509} 5510 5511defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 5512 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 5513def : Pat <(f32 (uint_to_fp (i32 5514 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5515 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5516 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5517def : Pat <(f32 (uint_to_fp (i32 5518 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5519 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5520 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5521// 16-bits -> float. 5522defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 5523 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 5524def : Pat <(f32 (uint_to_fp (i32 5525 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5526 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5527 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5528def : Pat <(f32 (uint_to_fp (i32 5529 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5530 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5531 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5532// 32-bits are handled in target specific dag combine: 5533// performIntToFpCombine. 5534// 64-bits integer to 32-bits floating point, not possible with 5535// UCVTF on floating point registers (both source and destination 5536// must have the same size). 5537 5538// Here are the patterns for 8, 16, 32, and 64-bits to double. 5539// 8-bits -> double. 5540defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 5541 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 5542def : Pat <(f64 (uint_to_fp (i32 5543 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5544 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5545 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5546def : Pat <(f64 (uint_to_fp (i32 5547 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5548 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5549 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5550// 16-bits -> double. 5551defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 5552 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 5553def : Pat <(f64 (uint_to_fp (i32 5554 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5555 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5556 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5557def : Pat <(f64 (uint_to_fp (i32 5558 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5559 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5560 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5561// 32-bits -> double. 5562defm : UIntToFPROLoadPat<f64, i32, load, 5563 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 5564def : Pat <(f64 (uint_to_fp (i32 5565 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 5566 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5567 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 5568def : Pat <(f64 (uint_to_fp (i32 5569 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 5570 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5571 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 5572// 64-bits -> double are handled in target specific dag combine: 5573// performIntToFpCombine. 5574} // let Predicates = [HasNEON] 5575 5576//===----------------------------------------------------------------------===// 5577// Advanced SIMD three different-sized vector instructions. 5578//===----------------------------------------------------------------------===// 5579 5580defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 5581defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 5582defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 5583defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 5584defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; 5585defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 5586 AArch64sabd>; 5587defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 5588 AArch64sabd>; 5589defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 5590 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 5591defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 5592 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 5593defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 5594 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5595defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 5596 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5597defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; 5598defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 5599 int_aarch64_neon_sqadd>; 5600defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 5601 int_aarch64_neon_sqsub>; 5602defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 5603 int_aarch64_neon_sqdmull>; 5604defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 5605 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 5606defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 5607 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 5608defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 5609 AArch64uabd>; 5610defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 5611 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 5612defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 5613 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 5614defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 5615 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5616defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 5617 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5618defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; 5619defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 5620 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 5621defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 5622 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 5623 5624// Additional patterns for [SU]ML[AS]L 5625multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 5626 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 5627 def : Pat<(v4i16 (opnode 5628 V64:$Ra, 5629 (v4i16 (extract_subvector 5630 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 5631 (i64 0))))), 5632 (EXTRACT_SUBREG (v8i16 (INST8B 5633 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 5634 V64:$Rn, V64:$Rm)), dsub)>; 5635 def : Pat<(v2i32 (opnode 5636 V64:$Ra, 5637 (v2i32 (extract_subvector 5638 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 5639 (i64 0))))), 5640 (EXTRACT_SUBREG (v4i32 (INST4H 5641 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 5642 V64:$Rn, V64:$Rm)), dsub)>; 5643 def : Pat<(v1i64 (opnode 5644 V64:$Ra, 5645 (v1i64 (extract_subvector 5646 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 5647 (i64 0))))), 5648 (EXTRACT_SUBREG (v2i64 (INST2S 5649 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 5650 V64:$Rn, V64:$Rm)), dsub)>; 5651} 5652 5653defm : Neon_mul_acc_widen_patterns<add, AArch64umull, 5654 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 5655defm : Neon_mul_acc_widen_patterns<add, AArch64smull, 5656 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 5657defm : Neon_mul_acc_widen_patterns<sub, AArch64umull, 5658 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 5659defm : Neon_mul_acc_widen_patterns<sub, AArch64smull, 5660 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 5661 5662 5663multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> { 5664 def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)), 5665 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5666 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>; 5667 def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)), 5668 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5669 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>; 5670 def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)), 5671 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5672 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>; 5673 5674 def : Pat<(v4i16 (opnode (v4i16 V64:$Rn), 5675 (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))), 5676 (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5677 def : Pat<(v2i32 (opnode (v2i32 V64:$Rn), 5678 (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))), 5679 (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5680 def : Pat<(v1i64 (opnode (v1i64 V64:$Rn), 5681 (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))), 5682 (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>; 5683} 5684 5685defm : Neon_addl_extract_patterns<add, zanyext, "UADD">; 5686defm : Neon_addl_extract_patterns<add, sext, "SADD">; 5687defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">; 5688defm : Neon_addl_extract_patterns<sub, sext, "SSUB">; 5689 5690// CodeGen patterns for addhn and subhn instructions, which can actually be 5691// written in LLVM IR without too much difficulty. 5692 5693// Prioritize ADDHN and SUBHN over UZP2. 5694let AddedComplexity = 10 in { 5695 5696// ADDHN 5697def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 5698 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5699def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5700 (i32 16))))), 5701 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5702def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5703 (i32 32))))), 5704 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5705def : Pat<(concat_vectors (v8i8 V64:$Rd), 5706 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5707 (i32 8))))), 5708 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5709 V128:$Rn, V128:$Rm)>; 5710def : Pat<(concat_vectors (v4i16 V64:$Rd), 5711 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5712 (i32 16))))), 5713 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5714 V128:$Rn, V128:$Rm)>; 5715def : Pat<(concat_vectors (v2i32 V64:$Rd), 5716 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5717 (i32 32))))), 5718 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5719 V128:$Rn, V128:$Rm)>; 5720 5721// SUBHN 5722def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 5723 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5724def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5725 (i32 16))))), 5726 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5727def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5728 (i32 32))))), 5729 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5730def : Pat<(concat_vectors (v8i8 V64:$Rd), 5731 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5732 (i32 8))))), 5733 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5734 V128:$Rn, V128:$Rm)>; 5735def : Pat<(concat_vectors (v4i16 V64:$Rd), 5736 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5737 (i32 16))))), 5738 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5739 V128:$Rn, V128:$Rm)>; 5740def : Pat<(concat_vectors (v2i32 V64:$Rd), 5741 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5742 (i32 32))))), 5743 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5744 V128:$Rn, V128:$Rm)>; 5745 5746} // AddedComplexity = 10 5747 5748//---------------------------------------------------------------------------- 5749// AdvSIMD bitwise extract from vector instruction. 5750//---------------------------------------------------------------------------- 5751 5752defm EXT : SIMDBitwiseExtract<"ext">; 5753 5754def AdjustExtImm : SDNodeXForm<imm, [{ 5755 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); 5756}]>; 5757multiclass ExtPat<ValueType VT64, ValueType VT128, int N> { 5758 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 5759 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 5760 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 5761 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 5762 // We use EXT to handle extract_subvector to copy the upper 64-bits of a 5763 // 128-bit vector. 5764 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), 5765 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 5766 // A 64-bit EXT of two halves of the same 128-bit register can be done as a 5767 // single 128-bit EXT. 5768 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), 5769 (extract_subvector V128:$Rn, (i64 N)), 5770 (i32 imm:$imm))), 5771 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; 5772 // A 64-bit EXT of the high half of a 128-bit register can be done using a 5773 // 128-bit EXT of the whole register with an adjustment to the immediate. The 5774 // top half of the other operand will be unset, but that doesn't matter as it 5775 // will not be used. 5776 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), 5777 V64:$Rm, 5778 (i32 imm:$imm))), 5779 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, 5780 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 5781 (AdjustExtImm imm:$imm)), dsub)>; 5782} 5783 5784defm : ExtPat<v8i8, v16i8, 8>; 5785defm : ExtPat<v4i16, v8i16, 4>; 5786defm : ExtPat<v4f16, v8f16, 4>; 5787defm : ExtPat<v4bf16, v8bf16, 4>; 5788defm : ExtPat<v2i32, v4i32, 2>; 5789defm : ExtPat<v2f32, v4f32, 2>; 5790defm : ExtPat<v1i64, v2i64, 1>; 5791defm : ExtPat<v1f64, v2f64, 1>; 5792 5793//---------------------------------------------------------------------------- 5794// AdvSIMD zip vector 5795//---------------------------------------------------------------------------- 5796 5797defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 5798defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 5799defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 5800defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 5801defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 5802defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 5803 5804def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), 5805 (v8i8 (trunc (v8i16 V128:$Vm))))), 5806 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 5807def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), 5808 (v4i16 (trunc (v4i32 V128:$Vm))))), 5809 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 5810def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), 5811 (v2i32 (trunc (v2i64 V128:$Vm))))), 5812 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 5813 5814def : Pat<(v16i8 (concat_vectors 5815 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), 5816 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), 5817 (UZP2v16i8 V128:$Vn, V128:$Vm)>; 5818def : Pat<(v8i16 (concat_vectors 5819 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), 5820 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), 5821 (UZP2v8i16 V128:$Vn, V128:$Vm)>; 5822def : Pat<(v4i32 (concat_vectors 5823 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), 5824 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), 5825 (UZP2v4i32 V128:$Vn, V128:$Vm)>; 5826 5827//---------------------------------------------------------------------------- 5828// AdvSIMD TBL/TBX instructions 5829//---------------------------------------------------------------------------- 5830 5831defm TBL : SIMDTableLookup< 0, "tbl">; 5832defm TBX : SIMDTableLookupTied<1, "tbx">; 5833 5834def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5835 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 5836def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5837 (TBLv16i8One V128:$Ri, V128:$Rn)>; 5838 5839def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 5840 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5841 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 5842def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 5843 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5844 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 5845 5846 5847//---------------------------------------------------------------------------- 5848// AdvSIMD scalar DUP instruction 5849//---------------------------------------------------------------------------- 5850 5851defm DUP : SIMDScalarDUP<"mov">; 5852 5853//---------------------------------------------------------------------------- 5854// AdvSIMD scalar pairwise instructions 5855//---------------------------------------------------------------------------- 5856 5857defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 5858defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 5859defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 5860defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 5861defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 5862defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 5863 5864// Only the lower half of the result of the inner FADDP is used in the patterns 5865// below, so the second operand does not matter. Re-use the first input 5866// operand, so no additional dependencies need to be introduced. 5867let Predicates = [HasFullFP16] in { 5868def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 5869 (FADDPv2i16p 5870 (EXTRACT_SUBREG 5871 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), 5872 dsub))>; 5873def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 5874 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; 5875} 5876def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 5877 (FADDPv2i32p 5878 (EXTRACT_SUBREG 5879 (FADDPv4f32 V128:$Rn, V128:$Rn), 5880 dsub))>; 5881def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 5882 (FADDPv2i32p V64:$Rn)>; 5883def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 5884 (FADDPv2i64p V128:$Rn)>; 5885 5886def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 5887 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5888def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 5889 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5890def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 5891 (FADDPv2i32p V64:$Rn)>; 5892def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 5893 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 5894def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 5895 (FADDPv2i64p V128:$Rn)>; 5896def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))), 5897 (FMAXNMPv2i32p V64:$Rn)>; 5898def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))), 5899 (FMAXNMPv2i64p V128:$Rn)>; 5900def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))), 5901 (FMAXPv2i32p V64:$Rn)>; 5902def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))), 5903 (FMAXPv2i64p V128:$Rn)>; 5904def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))), 5905 (FMINNMPv2i32p V64:$Rn)>; 5906def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))), 5907 (FMINNMPv2i64p V128:$Rn)>; 5908def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))), 5909 (FMINPv2i32p V64:$Rn)>; 5910def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))), 5911 (FMINPv2i64p V128:$Rn)>; 5912 5913//---------------------------------------------------------------------------- 5914// AdvSIMD INS/DUP instructions 5915//---------------------------------------------------------------------------- 5916 5917def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 5918def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 5919def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 5920def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 5921def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 5922def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 5923def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 5924 5925def DUPv2i64lane : SIMDDup64FromElement; 5926def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 5927def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 5928def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 5929def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 5930def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 5931def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 5932 5933// DUP from a 64-bit register to a 64-bit register is just a copy 5934def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), 5935 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; 5936def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), 5937 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; 5938 5939def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 5940 (v2f32 (DUPv2i32lane 5941 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5942 (i64 0)))>; 5943def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 5944 (v4f32 (DUPv4i32lane 5945 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5946 (i64 0)))>; 5947def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 5948 (v2f64 (DUPv2i64lane 5949 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 5950 (i64 0)))>; 5951def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 5952 (v4f16 (DUPv4i16lane 5953 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5954 (i64 0)))>; 5955def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), 5956 (v4bf16 (DUPv4i16lane 5957 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5958 (i64 0)))>; 5959def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 5960 (v8f16 (DUPv8i16lane 5961 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5962 (i64 0)))>; 5963def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), 5964 (v8bf16 (DUPv8i16lane 5965 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5966 (i64 0)))>; 5967 5968def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5969 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5970def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5971 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5972 5973def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5974 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5975def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5976 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5977 5978def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5979 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 5980def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5981 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 5982def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 5983 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 5984 5985// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 5986// instruction even if the types don't match: we just have to remap the lane 5987// carefully. N.b. this trick only applies to truncations. 5988def VecIndex_x2 : SDNodeXForm<imm, [{ 5989 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 5990}]>; 5991def VecIndex_x4 : SDNodeXForm<imm, [{ 5992 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 5993}]>; 5994def VecIndex_x8 : SDNodeXForm<imm, [{ 5995 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 5996}]>; 5997 5998multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 5999 ValueType Src128VT, ValueType ScalVT, 6000 Instruction DUP, SDNodeXForm IdxXFORM> { 6001 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 6002 imm:$idx)))), 6003 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6004 6005 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 6006 imm:$idx)))), 6007 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6008} 6009 6010defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 6011defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 6012defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 6013 6014defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 6015defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 6016defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 6017 6018multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 6019 SDNodeXForm IdxXFORM> { 6020 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 6021 imm:$idx))))), 6022 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 6023 6024 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 6025 imm:$idx))))), 6026 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 6027} 6028 6029defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 6030defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 6031defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 6032 6033defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 6034defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 6035defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 6036 6037// SMOV and UMOV definitions, with some extra patterns for convenience 6038defm SMOV : SMov; 6039defm UMOV : UMov; 6040 6041def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6042 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 6043def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 6044 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6045def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6046 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6047def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6048 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6049def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 6050 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 6051def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 6052 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 6053 6054def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6055 VectorIndexB:$idx)))), i8), 6056 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 6057def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6058 VectorIndexH:$idx)))), i16), 6059 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 6060 6061// Extracting i8 or i16 elements will have the zero-extend transformed to 6062// an 'and' mask by type legalization since neither i8 nor i16 are legal types 6063// for AArch64. Match these patterns here since UMOV already zeroes out the high 6064// bits of the destination register. 6065def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 6066 (i32 0xff)), 6067 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 6068def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 6069 (i32 0xffff)), 6070 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 6071 6072def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 6073 VectorIndexB:$idx)))), (i64 0xff))), 6074 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; 6075def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 6076 VectorIndexH:$idx)))), (i64 0xffff))), 6077 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; 6078 6079defm INS : SIMDIns; 6080 6081def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 6082 (SUBREG_TO_REG (i32 0), 6083 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6084def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 6085 (SUBREG_TO_REG (i32 0), 6086 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6087 6088// The top bits will be zero from the FMOVWSr 6089def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), 6090 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; 6091 6092def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 6093 (SUBREG_TO_REG (i32 0), 6094 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6095def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 6096 (SUBREG_TO_REG (i32 0), 6097 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 6098 6099def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6100 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6101def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6102 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6103 6104def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6105 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6106def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6107 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6108 6109def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 6110 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 6111 (i32 FPR32:$Rn), ssub))>; 6112def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 6113 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6114 (i32 FPR32:$Rn), ssub))>; 6115 6116def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 6117 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 6118 (i64 FPR64:$Rn), dsub))>; 6119 6120def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 6121 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6122def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 6123 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6124 6125def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6126 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6127def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 6128 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 6129 6130def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 6131 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6132def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 6133 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 6134 6135def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 6136 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 6137 6138def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 6139 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6140 (EXTRACT_SUBREG 6141 (INSvi16lane 6142 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6143 VectorIndexS:$imm, 6144 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6145 (i64 0)), 6146 dsub)>; 6147 6148def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6149 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 6150def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)), 6151 (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>; 6152def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6153 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 6154def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)), 6155 (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>; 6156def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)), 6157 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 6158 6159def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 6160 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6161 (INSvi16lane 6162 V128:$Rn, VectorIndexH:$imm, 6163 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6164 (i64 0))>; 6165 6166def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), 6167 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 6168 (EXTRACT_SUBREG 6169 (INSvi16lane 6170 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6171 VectorIndexS:$imm, 6172 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6173 (i64 0)), 6174 dsub)>; 6175 6176def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), 6177 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 6178 (INSvi16lane 6179 V128:$Rn, VectorIndexH:$imm, 6180 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 6181 (i64 0))>; 6182 6183def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 6184 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6185 (EXTRACT_SUBREG 6186 (INSvi32lane 6187 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6188 VectorIndexS:$imm, 6189 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6190 (i64 0)), 6191 dsub)>; 6192def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 6193 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 6194 (INSvi32lane 6195 V128:$Rn, VectorIndexS:$imm, 6196 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 6197 (i64 0))>; 6198def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 6199 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 6200 (INSvi64lane 6201 V128:$Rn, VectorIndexD:$imm, 6202 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 6203 (i64 0))>; 6204 6205def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))), 6206 (EXTRACT_SUBREG 6207 (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6208 VectorIndexS:$imm, GPR32:$Rm), 6209 dsub)>; 6210def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))), 6211 (EXTRACT_SUBREG 6212 (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6213 VectorIndexH:$imm, GPR32:$Rm), 6214 dsub)>; 6215def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))), 6216 (EXTRACT_SUBREG 6217 (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), 6218 VectorIndexB:$imm, GPR32:$Rm), 6219 dsub)>; 6220 6221// Copy an element at a constant index in one vector into a constant indexed 6222// element of another. 6223// FIXME refactor to a shared class/dev parameterized on vector type, vector 6224// index type and INS extension 6225def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 6226 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 6227 VectorIndexB:$idx2)), 6228 (v16i8 (INSvi8lane 6229 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 6230 )>; 6231def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 6232 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 6233 VectorIndexH:$idx2)), 6234 (v8i16 (INSvi16lane 6235 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 6236 )>; 6237def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 6238 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 6239 VectorIndexS:$idx2)), 6240 (v4i32 (INSvi32lane 6241 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 6242 )>; 6243def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 6244 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 6245 VectorIndexD:$idx2)), 6246 (v2i64 (INSvi64lane 6247 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 6248 )>; 6249 6250multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 6251 ValueType VTScal, Instruction INS> { 6252 def : Pat<(VT128 (vector_insert V128:$src, 6253 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6254 imm:$Immd)), 6255 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 6256 6257 def : Pat<(VT128 (vector_insert V128:$src, 6258 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6259 imm:$Immd)), 6260 (INS V128:$src, imm:$Immd, 6261 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 6262 6263 def : Pat<(VT64 (vector_insert V64:$src, 6264 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6265 imm:$Immd)), 6266 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 6267 imm:$Immd, V128:$Rn, imm:$Immn), 6268 dsub)>; 6269 6270 def : Pat<(VT64 (vector_insert V64:$src, 6271 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6272 imm:$Immd)), 6273 (EXTRACT_SUBREG 6274 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 6275 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 6276 dsub)>; 6277} 6278 6279defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 6280defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>; 6281defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 6282defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 6283 6284defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; 6285defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; 6286defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; 6287defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>; 6288 6289// Insert from bitcast 6290// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) 6291def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6292 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; 6293def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6294 (EXTRACT_SUBREG 6295 (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)), 6296 imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0), 6297 dsub)>; 6298def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), 6299 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; 6300 6301// bitcast of an extract 6302// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) 6303def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), 6304 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; 6305def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))), 6306 (EXTRACT_SUBREG V128:$src, ssub)>; 6307def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), 6308 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; 6309def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))), 6310 (EXTRACT_SUBREG V128:$src, dsub)>; 6311 6312// Floating point vector extractions are codegen'd as either a sequence of 6313// subregister extractions, or a MOV (aka DUP here) if 6314// the lane number is anything other than zero. 6315def : Pat<(vector_extract (v2f64 V128:$Rn), 0), 6316 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 6317def : Pat<(vector_extract (v4f32 V128:$Rn), 0), 6318 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 6319def : Pat<(vector_extract (v8f16 V128:$Rn), 0), 6320 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6321def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), 6322 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6323 6324 6325def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 6326 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; 6327def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 6328 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; 6329def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 6330 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6331def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), 6332 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6333 6334// All concat_vectors operations are canonicalised to act on i64 vectors for 6335// AArch64. In the general case we need an instruction, which had just as well be 6336// INS. 6337class ConcatPat<ValueType DstTy, ValueType SrcTy> 6338 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 6339 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 6340 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 6341 6342def : ConcatPat<v2i64, v1i64>; 6343def : ConcatPat<v2f64, v1f64>; 6344def : ConcatPat<v4i32, v2i32>; 6345def : ConcatPat<v4f32, v2f32>; 6346def : ConcatPat<v8i16, v4i16>; 6347def : ConcatPat<v8f16, v4f16>; 6348def : ConcatPat<v8bf16, v4bf16>; 6349def : ConcatPat<v16i8, v8i8>; 6350 6351// If the high lanes are undef, though, we can just ignore them: 6352class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 6353 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 6354 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 6355 6356def : ConcatUndefPat<v2i64, v1i64>; 6357def : ConcatUndefPat<v2f64, v1f64>; 6358def : ConcatUndefPat<v4i32, v2i32>; 6359def : ConcatUndefPat<v4f32, v2f32>; 6360def : ConcatUndefPat<v8i16, v4i16>; 6361def : ConcatUndefPat<v16i8, v8i8>; 6362 6363//---------------------------------------------------------------------------- 6364// AdvSIMD across lanes instructions 6365//---------------------------------------------------------------------------- 6366 6367defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 6368defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 6369defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 6370defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 6371defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 6372defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 6373defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 6374defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>; 6375defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>; 6376defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>; 6377defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>; 6378 6379multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> { 6380 // Patterns for addv(addlp(x)) ==> addlv 6381 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, 6382 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), 6383 (i64 0))), (i64 0))), 6384 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6385 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>; 6386 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), 6387 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6388 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>; 6389 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), 6390 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>; 6391 6392 // Patterns for addp(addlp(x))) ==> addlv 6393 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), 6394 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>; 6395 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), 6396 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>; 6397} 6398 6399defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; 6400defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; 6401 6402// Patterns for uaddlv(uaddlp(x)) ==> uaddlv 6403def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))), 6404 (i64 (EXTRACT_SUBREG 6405 (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)), 6406 dsub))>; 6407 6408def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), 6409 (i32 (EXTRACT_SUBREG 6410 (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)), 6411 ssub))>; 6412 6413// Patterns for across-vector intrinsics, that have a node equivalent, that 6414// returns a vector (with only the low lane defined) instead of a scalar. 6415// In effect, opNode is the same as (scalar_to_vector (IntNode)). 6416multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 6417 SDPatternOperator opNode> { 6418// If a lane instruction caught the vector_extract around opNode, we can 6419// directly match the latter to the instruction. 6420def : Pat<(v8i8 (opNode V64:$Rn)), 6421 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6422 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 6423def : Pat<(v16i8 (opNode V128:$Rn)), 6424 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6425 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 6426def : Pat<(v4i16 (opNode V64:$Rn)), 6427 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6428 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 6429def : Pat<(v8i16 (opNode V128:$Rn)), 6430 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6431 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 6432def : Pat<(v4i32 (opNode V128:$Rn)), 6433 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6434 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 6435 6436 6437// If none did, fallback to the explicit patterns, consuming the vector_extract. 6438def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 6439 (i64 0)), (i64 0))), 6440 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6441 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 6442 bsub), ssub)>; 6443def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 6444 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6445 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 6446 bsub), ssub)>; 6447def : Pat<(i32 (vector_extract (insert_subvector undef, 6448 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), 6449 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6450 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 6451 hsub), ssub)>; 6452def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 6453 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6454 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 6455 hsub), ssub)>; 6456def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 6457 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6458 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 6459 ssub), ssub)>; 6460 6461} 6462 6463multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 6464 SDPatternOperator opNode> 6465 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6466// If there is a sign extension after this intrinsic, consume it as smov already 6467// performed it 6468def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6469 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), 6470 (i32 (SMOVvi8to32 6471 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6472 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6473 (i64 0)))>; 6474def : Pat<(i32 (sext_inreg (i32 (vector_extract 6475 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 6476 (i32 (SMOVvi8to32 6477 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6478 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6479 (i64 0)))>; 6480def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6481 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), 6482 (i32 (SMOVvi16to32 6483 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6484 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6485 (i64 0)))>; 6486def : Pat<(i32 (sext_inreg (i32 (vector_extract 6487 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 6488 (i32 (SMOVvi16to32 6489 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6490 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6491 (i64 0)))>; 6492} 6493 6494multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 6495 SDPatternOperator opNode> 6496 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6497// If there is a masking operation keeping only what has been actually 6498// generated, consume it. 6499def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6500 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), 6501 (i32 (EXTRACT_SUBREG 6502 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6503 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6504 ssub))>; 6505def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 6506 maski8_or_more)), 6507 (i32 (EXTRACT_SUBREG 6508 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6509 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6510 ssub))>; 6511def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6512 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), 6513 (i32 (EXTRACT_SUBREG 6514 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6515 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6516 ssub))>; 6517def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 6518 maski16_or_more)), 6519 (i32 (EXTRACT_SUBREG 6520 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6521 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6522 ssub))>; 6523} 6524 6525defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 6526// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6527def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 6528 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6529 6530defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 6531// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6532def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 6533 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6534 6535defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 6536def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 6537 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 6538 6539defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 6540def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 6541 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 6542 6543defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 6544def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 6545 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 6546 6547defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 6548def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 6549 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 6550 6551multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 6552 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6553 (i32 (SMOVvi16to32 6554 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6555 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6556 (i64 0)))>; 6557def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6558 (i32 (SMOVvi16to32 6559 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6560 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6561 (i64 0)))>; 6562 6563def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6564 (i32 (EXTRACT_SUBREG 6565 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6566 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6567 ssub))>; 6568def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6569 (i32 (EXTRACT_SUBREG 6570 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6571 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6572 ssub))>; 6573 6574def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6575 (i64 (EXTRACT_SUBREG 6576 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6577 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6578 dsub))>; 6579} 6580 6581multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 6582 Intrinsic intOp> { 6583 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6584 (i32 (EXTRACT_SUBREG 6585 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6586 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6587 ssub))>; 6588def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6589 (i32 (EXTRACT_SUBREG 6590 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6591 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6592 ssub))>; 6593 6594def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6595 (i32 (EXTRACT_SUBREG 6596 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6597 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6598 ssub))>; 6599def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6600 (i32 (EXTRACT_SUBREG 6601 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6602 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6603 ssub))>; 6604 6605def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6606 (i64 (EXTRACT_SUBREG 6607 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6608 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6609 dsub))>; 6610} 6611 6612defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 6613defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 6614 6615// The vaddlv_s32 intrinsic gets mapped to SADDLP. 6616def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 6617 (i64 (EXTRACT_SUBREG 6618 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6619 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 6620 dsub))>; 6621// The vaddlv_u32 intrinsic gets mapped to UADDLP. 6622def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 6623 (i64 (EXTRACT_SUBREG 6624 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6625 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 6626 dsub))>; 6627 6628//------------------------------------------------------------------------------ 6629// AdvSIMD modified immediate instructions 6630//------------------------------------------------------------------------------ 6631 6632// AdvSIMD BIC 6633defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 6634// AdvSIMD ORR 6635defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 6636 6637def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6638def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6639def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6640def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6641 6642def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6643def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6644def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6645def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6646 6647def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6648def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6649def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6650def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6651 6652def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6653def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6654def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6655def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6656 6657// AdvSIMD FMOV 6658def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 6659 "fmov", ".2d", 6660 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6661def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 6662 "fmov", ".2s", 6663 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6664def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 6665 "fmov", ".4s", 6666 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6667let Predicates = [HasNEON, HasFullFP16] in { 6668def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 6669 "fmov", ".4h", 6670 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6671def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 6672 "fmov", ".8h", 6673 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6674} // Predicates = [HasNEON, HasFullFP16] 6675 6676// AdvSIMD MOVI 6677 6678// EDIT byte mask: scalar 6679let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6680def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 6681 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 6682// The movi_edit node has the immediate value already encoded, so we use 6683// a plain imm0_255 here. 6684def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 6685 (MOVID imm0_255:$shift)>; 6686 6687// EDIT byte mask: 2d 6688 6689// The movi_edit node has the immediate value already encoded, so we use 6690// a plain imm0_255 in the pattern 6691let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6692def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 6693 simdimmtype10, 6694 "movi", ".2d", 6695 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 6696 6697def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6698def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6699def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6700def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6701def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6702def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6703def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6704def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6705 6706def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6707def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6708def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6709def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6710 6711// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the 6712// extract is free and this gives better MachineCSE results. 6713def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6714def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6715def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6716def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6717def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>; 6718def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>; 6719def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>; 6720def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>; 6721 6722def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6723def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6724def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6725def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6726 6727// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6728let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6729defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 6730 6731let Predicates = [HasNEON] in { 6732 // Using the MOVI to materialize fp constants. 6733 def : Pat<(f32 fpimm32SIMDModImmType4:$in), 6734 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), 6735 (i32 24)), 6736 ssub)>; 6737} 6738 6739def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6740def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6741def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6742def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6743 6744def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6745def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6746def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6747def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6748 6749def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6750 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 6751def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6752 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 6753def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6754 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 6755def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6756 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 6757 6758let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6759// EDIT per word: 2s & 4s with MSL shifter 6760def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 6761 [(set (v2i32 V64:$Rd), 6762 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6763def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 6764 [(set (v4i32 V128:$Rd), 6765 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6766 6767// Per byte: 8b & 16b 6768def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 6769 "movi", ".8b", 6770 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 6771 6772def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 6773 "movi", ".16b", 6774 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 6775} 6776 6777// AdvSIMD MVNI 6778 6779// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6780let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6781defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 6782 6783def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6784def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6785def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6786def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6787 6788def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6789def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6790def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6791def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6792 6793def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6794 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 6795def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6796 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 6797def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6798 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 6799def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6800 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 6801 6802// EDIT per word: 2s & 4s with MSL shifter 6803let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6804def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 6805 [(set (v2i32 V64:$Rd), 6806 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6807def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 6808 [(set (v4i32 V128:$Rd), 6809 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6810} 6811 6812//---------------------------------------------------------------------------- 6813// AdvSIMD indexed element 6814//---------------------------------------------------------------------------- 6815 6816let hasSideEffects = 0 in { 6817 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 6818 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 6819} 6820 6821// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 6822// instruction expects the addend first, while the intrinsic expects it last. 6823 6824// On the other hand, there are quite a few valid combinatorial options due to 6825// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 6826defm : SIMDFPIndexedTiedPatterns<"FMLA", 6827 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; 6828defm : SIMDFPIndexedTiedPatterns<"FMLA", 6829 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; 6830 6831defm : SIMDFPIndexedTiedPatterns<"FMLS", 6832 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 6833defm : SIMDFPIndexedTiedPatterns<"FMLS", 6834 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 6835defm : SIMDFPIndexedTiedPatterns<"FMLS", 6836 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 6837defm : SIMDFPIndexedTiedPatterns<"FMLS", 6838 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 6839 6840multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 6841 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6842 // and DUP scalar. 6843 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6844 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6845 VectorIndexS:$idx))), 6846 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6847 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6848 (v2f32 (AArch64duplane32 6849 (v4f32 (insert_subvector undef, 6850 (v2f32 (fneg V64:$Rm)), 6851 (i64 0))), 6852 VectorIndexS:$idx)))), 6853 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6854 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6855 VectorIndexS:$idx)>; 6856 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6857 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6858 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6859 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6860 6861 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6862 // and DUP scalar. 6863 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6864 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6865 VectorIndexS:$idx))), 6866 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 6867 VectorIndexS:$idx)>; 6868 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6869 (v4f32 (AArch64duplane32 6870 (v4f32 (insert_subvector undef, 6871 (v2f32 (fneg V64:$Rm)), 6872 (i64 0))), 6873 VectorIndexS:$idx)))), 6874 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6875 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6876 VectorIndexS:$idx)>; 6877 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6878 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6879 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6880 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6881 6882 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 6883 // (DUPLANE from 64-bit would be trivial). 6884 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6885 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 6886 VectorIndexD:$idx))), 6887 (FMLSv2i64_indexed 6888 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6889 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6890 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 6891 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 6892 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 6893 6894 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 6895 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6896 (vector_extract (v4f32 (fneg V128:$Rm)), 6897 VectorIndexS:$idx))), 6898 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6899 V128:$Rm, VectorIndexS:$idx)>; 6900 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6901 (vector_extract (v4f32 (insert_subvector undef, 6902 (v2f32 (fneg V64:$Rm)), 6903 (i64 0))), 6904 VectorIndexS:$idx))), 6905 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6906 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 6907 6908 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 6909 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 6910 (vector_extract (v2f64 (fneg V128:$Rm)), 6911 VectorIndexS:$idx))), 6912 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 6913 V128:$Rm, VectorIndexS:$idx)>; 6914} 6915 6916defm : FMLSIndexedAfterNegPatterns< 6917 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 6918defm : FMLSIndexedAfterNegPatterns< 6919 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; 6920 6921defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 6922defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; 6923 6924def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6925 (FMULv2i32_indexed V64:$Rn, 6926 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6927 (i64 0))>; 6928def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6929 (FMULv4i32_indexed V128:$Rn, 6930 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6931 (i64 0))>; 6932def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 6933 (FMULv2i64_indexed V128:$Rn, 6934 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 6935 (i64 0))>; 6936 6937defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 6938defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 6939 6940defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane, 6941 int_aarch64_neon_sqdmulh_laneq>; 6942defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane, 6943 int_aarch64_neon_sqrdmulh_laneq>; 6944 6945// Generated by MachineCombine 6946defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; 6947defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; 6948 6949defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 6950defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 6951 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6952defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 6953 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6954defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; 6955defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 6956 int_aarch64_neon_sqadd>; 6957defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 6958 int_aarch64_neon_sqsub>; 6959defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 6960 int_aarch64_neon_sqrdmlah>; 6961defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 6962 int_aarch64_neon_sqrdmlsh>; 6963defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 6964defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 6965 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6966defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 6967 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6968defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; 6969 6970// A scalar sqdmull with the second operand being a vector lane can be 6971// handled directly with the indexed instruction encoding. 6972def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 6973 (vector_extract (v4i32 V128:$Vm), 6974 VectorIndexS:$idx)), 6975 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 6976 6977//---------------------------------------------------------------------------- 6978// AdvSIMD scalar shift instructions 6979//---------------------------------------------------------------------------- 6980defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 6981defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 6982defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 6983defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 6984// Codegen patterns for the above. We don't put these directly on the 6985// instructions because TableGen's type inference can't handle the truth. 6986// Having the same base pattern for fp <--> int totally freaks it out. 6987def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 6988 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 6989def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 6990 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 6991def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 6992 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 6993def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 6994 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 6995def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 6996 vecshiftR64:$imm)), 6997 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 6998def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 6999 vecshiftR64:$imm)), 7000 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 7001def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 7002 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7003def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7004 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7005def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 7006 vecshiftR64:$imm)), 7007 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7008def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 7009 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7010def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 7011 vecshiftR64:$imm)), 7012 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 7013def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 7014 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 7015 7016// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. 7017 7018def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), 7019 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7020def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), 7021 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7022def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7023 (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 7024def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp 7025 (and FPR32:$Rn, (i32 65535)), 7026 vecshiftR16:$imm)), 7027 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7028def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), 7029 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 7030def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 7031 (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 7032def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), 7033 (i32 (INSERT_SUBREG 7034 (i32 (IMPLICIT_DEF)), 7035 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), 7036 hsub))>; 7037def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), 7038 (i64 (INSERT_SUBREG 7039 (i64 (IMPLICIT_DEF)), 7040 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), 7041 hsub))>; 7042def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), 7043 (i32 (INSERT_SUBREG 7044 (i32 (IMPLICIT_DEF)), 7045 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), 7046 hsub))>; 7047def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), 7048 (i64 (INSERT_SUBREG 7049 (i64 (IMPLICIT_DEF)), 7050 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), 7051 hsub))>; 7052def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7053 (i32 (INSERT_SUBREG 7054 (i32 (IMPLICIT_DEF)), 7055 (FACGE16 FPR16:$Rn, FPR16:$Rm), 7056 hsub))>; 7057def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 7058 (i32 (INSERT_SUBREG 7059 (i32 (IMPLICIT_DEF)), 7060 (FACGT16 FPR16:$Rn, FPR16:$Rm), 7061 hsub))>; 7062 7063defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 7064defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 7065defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 7066 int_aarch64_neon_sqrshrn>; 7067defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 7068 int_aarch64_neon_sqrshrun>; 7069defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7070defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7071defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 7072 int_aarch64_neon_sqshrn>; 7073defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 7074 int_aarch64_neon_sqshrun>; 7075defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 7076defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 7077defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 7078 TriOpFrag<(add node:$LHS, 7079 (AArch64srshri node:$MHS, node:$RHS))>>; 7080defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 7081defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 7082 TriOpFrag<(add_and_or_is_add node:$LHS, 7083 (AArch64vashr node:$MHS, node:$RHS))>>; 7084defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 7085 int_aarch64_neon_uqrshrn>; 7086defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7087defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 7088 int_aarch64_neon_uqshrn>; 7089defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 7090defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 7091 TriOpFrag<(add node:$LHS, 7092 (AArch64urshri node:$MHS, node:$RHS))>>; 7093defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 7094defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 7095 TriOpFrag<(add_and_or_is_add node:$LHS, 7096 (AArch64vlshr node:$MHS, node:$RHS))>>; 7097 7098//---------------------------------------------------------------------------- 7099// AdvSIMD vector shift instructions 7100//---------------------------------------------------------------------------- 7101defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 7102defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 7103defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 7104 int_aarch64_neon_vcvtfxs2fp>; 7105defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", 7106 BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>; 7107defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 7108 7109// X << 1 ==> X + X 7110class SHLToADDPat<ValueType ty, RegisterClass regtype> 7111 : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))), 7112 (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>; 7113 7114def : SHLToADDPat<v16i8, FPR128>; 7115def : SHLToADDPat<v8i16, FPR128>; 7116def : SHLToADDPat<v4i32, FPR128>; 7117def : SHLToADDPat<v2i64, FPR128>; 7118def : SHLToADDPat<v8i8, FPR64>; 7119def : SHLToADDPat<v4i16, FPR64>; 7120def : SHLToADDPat<v2i32, FPR64>; 7121 7122defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 7123 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 7124defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; 7125def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7126 (i32 vecshiftL64:$imm))), 7127 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 7128defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 7129 int_aarch64_neon_sqrshrn>; 7130defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 7131 int_aarch64_neon_sqrshrun>; 7132defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 7133defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 7134defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 7135 int_aarch64_neon_sqshrn>; 7136defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 7137 int_aarch64_neon_sqshrun>; 7138defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; 7139def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 7140 (i32 vecshiftR64:$imm))), 7141 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 7142defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 7143defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 7144 TriOpFrag<(add node:$LHS, 7145 (AArch64srshri node:$MHS, node:$RHS))> >; 7146defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 7147 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 7148 7149defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 7150defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 7151 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 7152defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 7153 int_aarch64_neon_vcvtfxu2fp>; 7154defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 7155 int_aarch64_neon_uqrshrn>; 7156defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 7157defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 7158 int_aarch64_neon_uqshrn>; 7159defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 7160defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 7161 TriOpFrag<(add node:$LHS, 7162 (AArch64urshri node:$MHS, node:$RHS))> >; 7163defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 7164 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 7165defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 7166defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 7167 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 7168 7169// RADDHN patterns for when RSHRN shifts by half the size of the vector element 7170def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), 7171 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 7172def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), 7173 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 7174let AddedComplexity = 5 in 7175def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), 7176 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 7177 7178// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element 7179def : Pat<(v16i8 (concat_vectors 7180 (v8i8 V64:$Vd), 7181 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), 7182 (RADDHNv8i16_v16i8 7183 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7184 (v8i16 (MOVIv2d_ns (i32 0))))>; 7185def : Pat<(v8i16 (concat_vectors 7186 (v4i16 V64:$Vd), 7187 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), 7188 (RADDHNv4i32_v8i16 7189 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7190 (v4i32 (MOVIv2d_ns (i32 0))))>; 7191let AddedComplexity = 5 in 7192def : Pat<(v4i32 (concat_vectors 7193 (v2i32 V64:$Vd), 7194 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), 7195 (RADDHNv2i64_v4i32 7196 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 7197 (v2i64 (MOVIv2d_ns (i32 0))))>; 7198 7199// SHRN patterns for when a logical right shift was used instead of arithmetic 7200// (the immediate guarantees no sign bits actually end up in the result so it 7201// doesn't matter). 7202def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 7203 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 7204def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 7205 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 7206def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 7207 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 7208 7209def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 7210 (trunc (AArch64vlshr (v8i16 V128:$Rn), 7211 vecshiftR16Narrow:$imm)))), 7212 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7213 V128:$Rn, vecshiftR16Narrow:$imm)>; 7214def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 7215 (trunc (AArch64vlshr (v4i32 V128:$Rn), 7216 vecshiftR32Narrow:$imm)))), 7217 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7218 V128:$Rn, vecshiftR32Narrow:$imm)>; 7219def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 7220 (trunc (AArch64vlshr (v2i64 V128:$Rn), 7221 vecshiftR64Narrow:$imm)))), 7222 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 7223 V128:$Rn, vecshiftR32Narrow:$imm)>; 7224 7225// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 7226// Anyexts are implemented as zexts. 7227def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 7228def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7229def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 7230def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 7231def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7232def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 7233def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 7234def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7235def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 7236// Also match an extend from the upper half of a 128 bit source register. 7237def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 7238 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7239def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 7240 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 7241def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 7242 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 7243def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 7244 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7245def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 7246 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 7247def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 7248 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 7249def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 7250 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7251def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 7252 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 7253def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 7254 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 7255 7256// Vector shift sxtl aliases 7257def : InstAlias<"sxtl.8h $dst, $src1", 7258 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7259def : InstAlias<"sxtl $dst.8h, $src1.8b", 7260 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7261def : InstAlias<"sxtl.4s $dst, $src1", 7262 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7263def : InstAlias<"sxtl $dst.4s, $src1.4h", 7264 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7265def : InstAlias<"sxtl.2d $dst, $src1", 7266 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7267def : InstAlias<"sxtl $dst.2d, $src1.2s", 7268 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7269 7270// Vector shift sxtl2 aliases 7271def : InstAlias<"sxtl2.8h $dst, $src1", 7272 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7273def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 7274 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7275def : InstAlias<"sxtl2.4s $dst, $src1", 7276 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7277def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 7278 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7279def : InstAlias<"sxtl2.2d $dst, $src1", 7280 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7281def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 7282 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7283 7284// Vector shift uxtl aliases 7285def : InstAlias<"uxtl.8h $dst, $src1", 7286 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7287def : InstAlias<"uxtl $dst.8h, $src1.8b", 7288 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7289def : InstAlias<"uxtl.4s $dst, $src1", 7290 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7291def : InstAlias<"uxtl $dst.4s, $src1.4h", 7292 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7293def : InstAlias<"uxtl.2d $dst, $src1", 7294 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7295def : InstAlias<"uxtl $dst.2d, $src1.2s", 7296 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7297 7298// Vector shift uxtl2 aliases 7299def : InstAlias<"uxtl2.8h $dst, $src1", 7300 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7301def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 7302 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7303def : InstAlias<"uxtl2.4s $dst, $src1", 7304 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7305def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 7306 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7307def : InstAlias<"uxtl2.2d $dst, $src1", 7308 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7309def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 7310 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7311 7312// If an integer is about to be converted to a floating point value, 7313// just load it on the floating point unit. 7314// These patterns are more complex because floating point loads do not 7315// support sign extension. 7316// The sign extension has to be explicitly added and is only supported for 7317// one step: byte-to-half, half-to-word, word-to-doubleword. 7318// SCVTF GPR -> FPR is 9 cycles. 7319// SCVTF FPR -> FPR is 4 cyclces. 7320// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 7321// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 7322// and still being faster. 7323// However, this is not good for code size. 7324// 8-bits -> float. 2 sizes step-up. 7325class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 7326 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 7327 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7328 (SSHLLv4i16_shift 7329 (f64 7330 (EXTRACT_SUBREG 7331 (SSHLLv8i8_shift 7332 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7333 INST, 7334 bsub), 7335 0), 7336 dsub)), 7337 0), 7338 ssub)))>, 7339 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7340 7341def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 7342 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 7343def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 7344 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 7345def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 7346 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 7347def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 7348 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 7349 7350// 16-bits -> float. 1 size step-up. 7351class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 7352 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7353 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7354 (SSHLLv4i16_shift 7355 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7356 INST, 7357 hsub), 7358 0), 7359 ssub)))>, 7360 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7361 7362def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7363 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7364def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7365 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7366def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7367 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7368def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7369 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7370 7371// 32-bits to 32-bits are handled in target specific dag combine: 7372// performIntToFpCombine. 7373// 64-bits integer to 32-bits floating point, not possible with 7374// SCVTF on floating point registers (both source and destination 7375// must have the same size). 7376 7377// Here are the patterns for 8, 16, 32, and 64-bits to double. 7378// 8-bits -> double. 3 size step-up: give up. 7379// 16-bits -> double. 2 size step. 7380class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 7381 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7382 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7383 (SSHLLv2i32_shift 7384 (f64 7385 (EXTRACT_SUBREG 7386 (SSHLLv4i16_shift 7387 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7388 INST, 7389 hsub), 7390 0), 7391 dsub)), 7392 0), 7393 dsub)))>, 7394 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7395 7396def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7397 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7398def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7399 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7400def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7401 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7402def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7403 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7404// 32-bits -> double. 1 size step-up. 7405class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 7406 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 7407 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7408 (SSHLLv2i32_shift 7409 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7410 INST, 7411 ssub), 7412 0), 7413 dsub)))>, 7414 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7415 7416def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 7417 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 7418def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 7419 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 7420def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 7421 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 7422def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 7423 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 7424 7425// 64-bits -> double are handled in target specific dag combine: 7426// performIntToFpCombine. 7427 7428 7429//---------------------------------------------------------------------------- 7430// AdvSIMD Load-Store Structure 7431//---------------------------------------------------------------------------- 7432defm LD1 : SIMDLd1Multiple<"ld1">; 7433defm LD2 : SIMDLd2Multiple<"ld2">; 7434defm LD3 : SIMDLd3Multiple<"ld3">; 7435defm LD4 : SIMDLd4Multiple<"ld4">; 7436 7437defm ST1 : SIMDSt1Multiple<"st1">; 7438defm ST2 : SIMDSt2Multiple<"st2">; 7439defm ST3 : SIMDSt3Multiple<"st3">; 7440defm ST4 : SIMDSt4Multiple<"st4">; 7441 7442class Ld1Pat<ValueType ty, Instruction INST> 7443 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 7444 7445def : Ld1Pat<v16i8, LD1Onev16b>; 7446def : Ld1Pat<v8i16, LD1Onev8h>; 7447def : Ld1Pat<v4i32, LD1Onev4s>; 7448def : Ld1Pat<v2i64, LD1Onev2d>; 7449def : Ld1Pat<v8i8, LD1Onev8b>; 7450def : Ld1Pat<v4i16, LD1Onev4h>; 7451def : Ld1Pat<v2i32, LD1Onev2s>; 7452def : Ld1Pat<v1i64, LD1Onev1d>; 7453 7454class St1Pat<ValueType ty, Instruction INST> 7455 : Pat<(store ty:$Vt, GPR64sp:$Rn), 7456 (INST ty:$Vt, GPR64sp:$Rn)>; 7457 7458def : St1Pat<v16i8, ST1Onev16b>; 7459def : St1Pat<v8i16, ST1Onev8h>; 7460def : St1Pat<v4i32, ST1Onev4s>; 7461def : St1Pat<v2i64, ST1Onev2d>; 7462def : St1Pat<v8i8, ST1Onev8b>; 7463def : St1Pat<v4i16, ST1Onev4h>; 7464def : St1Pat<v2i32, ST1Onev2s>; 7465def : St1Pat<v1i64, ST1Onev1d>; 7466 7467//--- 7468// Single-element 7469//--- 7470 7471defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 7472defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 7473defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 7474defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 7475let mayLoad = 1, hasSideEffects = 0 in { 7476defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 7477defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 7478defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 7479defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 7480defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 7481defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 7482defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 7483defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 7484defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 7485defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 7486defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 7487defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 7488defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 7489defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 7490defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 7491defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 7492} 7493 7494def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7495 (LD1Rv8b GPR64sp:$Rn)>; 7496def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7497 (LD1Rv16b GPR64sp:$Rn)>; 7498def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7499 (LD1Rv4h GPR64sp:$Rn)>; 7500def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7501 (LD1Rv8h GPR64sp:$Rn)>; 7502def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7503 (LD1Rv2s GPR64sp:$Rn)>; 7504def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7505 (LD1Rv4s GPR64sp:$Rn)>; 7506def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7507 (LD1Rv2d GPR64sp:$Rn)>; 7508def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7509 (LD1Rv1d GPR64sp:$Rn)>; 7510 7511def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7512 (LD1Rv8b GPR64sp:$Rn)>; 7513def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))), 7514 (LD1Rv16b GPR64sp:$Rn)>; 7515def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7516 (LD1Rv4h GPR64sp:$Rn)>; 7517def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))), 7518 (LD1Rv8h GPR64sp:$Rn)>; 7519def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))), 7520 (LD1Rv2s GPR64sp:$Rn)>; 7521def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))), 7522 (LD1Rv4s GPR64sp:$Rn)>; 7523def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))), 7524 (LD1Rv2d GPR64sp:$Rn)>; 7525 7526// Grab the floating point version too 7527def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7528 (LD1Rv2s GPR64sp:$Rn)>; 7529def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7530 (LD1Rv4s GPR64sp:$Rn)>; 7531def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7532 (LD1Rv2d GPR64sp:$Rn)>; 7533def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7534 (LD1Rv1d GPR64sp:$Rn)>; 7535def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7536 (LD1Rv4h GPR64sp:$Rn)>; 7537def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7538 (LD1Rv8h GPR64sp:$Rn)>; 7539def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7540 (LD1Rv4h GPR64sp:$Rn)>; 7541def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7542 (LD1Rv8h GPR64sp:$Rn)>; 7543 7544class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 7545 ValueType VTy, ValueType STy, Instruction LD1> 7546 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7547 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7548 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 7549 7550def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 7551def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 7552def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 7553def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 7554def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 7555def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 7556def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 7557def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>; 7558 7559// Generate LD1 for extload if memory type does not match the 7560// destination type, for example: 7561// 7562// (v4i32 (insert_vector_elt (load anyext from i8) idx)) 7563// 7564// In this case, the index must be adjusted to match LD1 type. 7565// 7566class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand 7567 VecIndex, ValueType VTy, ValueType STy, 7568 Instruction LD1, SDNodeXForm IdxOp> 7569 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7570 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7571 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; 7572 7573class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex, 7574 ValueType VTy, ValueType STy, Instruction LD1, 7575 SDNodeXForm IdxOp> 7576 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7577 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7578 (EXTRACT_SUBREG 7579 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7580 (IdxOp VecIndex:$idx), GPR64sp:$Rn), 7581 dsub)>; 7582 7583def VectorIndexStoH : SDNodeXForm<imm, [{ 7584 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7585}]>; 7586def VectorIndexStoB : SDNodeXForm<imm, [{ 7587 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); 7588}]>; 7589def VectorIndexHtoB : SDNodeXForm<imm, [{ 7590 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7591}]>; 7592 7593def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>; 7594def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>; 7595def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>; 7596 7597def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>; 7598def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>; 7599def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>; 7600 7601// Same as above, but the first element is populated using 7602// scalar_to_vector + insert_subvector instead of insert_vector_elt. 7603let Predicates = [IsNeonAvailable] in { 7604 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy, 7605 SDPatternOperator ExtLoad, Instruction LD1> 7606 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), 7607 (ResultTy (EXTRACT_SUBREG 7608 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; 7609 7610 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>; 7611 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>; 7612 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>; 7613} 7614class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 7615 ValueType VTy, ValueType STy, Instruction LD1> 7616 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7617 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7618 (EXTRACT_SUBREG 7619 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7620 VecIndex:$idx, GPR64sp:$Rn), 7621 dsub)>; 7622 7623def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 7624def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 7625def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 7626def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 7627def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 7628def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>; 7629 7630 7631defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 7632defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 7633defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 7634defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 7635 7636// Stores 7637defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 7638defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 7639defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 7640defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 7641 7642let AddedComplexity = 19 in 7643class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7644 ValueType VTy, ValueType STy, Instruction ST1> 7645 : Pat<(scalar_store 7646 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7647 GPR64sp:$Rn), 7648 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 7649 7650def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 7651def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 7652def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 7653def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 7654def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 7655def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 7656def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 7657def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>; 7658 7659let AddedComplexity = 19 in 7660class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7661 ValueType VTy, ValueType STy, Instruction ST1> 7662 : Pat<(scalar_store 7663 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7664 GPR64sp:$Rn), 7665 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7666 VecIndex:$idx, GPR64sp:$Rn)>; 7667 7668def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 7669def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 7670def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 7671def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 7672def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 7673def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>; 7674 7675multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7676 ValueType VTy, ValueType STy, Instruction ST1, 7677 int offset> { 7678 def : Pat<(scalar_store 7679 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7680 GPR64sp:$Rn, offset), 7681 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7682 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7683 7684 def : Pat<(scalar_store 7685 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7686 GPR64sp:$Rn, GPR64:$Rm), 7687 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7688 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7689} 7690 7691defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 7692defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 7693 2>; 7694defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 7695defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 7696defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 7697defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 7698defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 7699defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>; 7700 7701multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7702 ValueType VTy, ValueType STy, Instruction ST1, 7703 int offset> { 7704 def : Pat<(scalar_store 7705 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7706 GPR64sp:$Rn, offset), 7707 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7708 7709 def : Pat<(scalar_store 7710 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7711 GPR64sp:$Rn, GPR64:$Rm), 7712 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7713} 7714 7715defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 7716 1>; 7717defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 7718 2>; 7719defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 7720defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 7721defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 7722defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 7723defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 7724defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>; 7725 7726let mayStore = 1, hasSideEffects = 0 in { 7727defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 7728defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 7729defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 7730defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 7731defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 7732defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 7733defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 7734defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 7735defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 7736defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 7737defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 7738defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 7739} 7740 7741defm ST1 : SIMDLdSt1SingleAliases<"st1">; 7742defm ST2 : SIMDLdSt2SingleAliases<"st2">; 7743defm ST3 : SIMDLdSt3SingleAliases<"st3">; 7744defm ST4 : SIMDLdSt4SingleAliases<"st4">; 7745 7746//---------------------------------------------------------------------------- 7747// Crypto extensions 7748//---------------------------------------------------------------------------- 7749 7750let Predicates = [HasAES] in { 7751def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 7752def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 7753def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 7754def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 7755} 7756 7757// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required 7758// for AES fusion on some CPUs. 7759let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 7760def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7761 Sched<[WriteVq]>; 7762def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7763 Sched<[WriteVq]>; 7764} 7765 7766// Only use constrained versions of AES(I)MC instructions if they are paired with 7767// AESE/AESD. 7768def : Pat<(v16i8 (int_aarch64_crypto_aesmc 7769 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), 7770 (v16i8 V128:$src2))))), 7771 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), 7772 (v16i8 V128:$src2)))))>, 7773 Requires<[HasFuseAES]>; 7774 7775def : Pat<(v16i8 (int_aarch64_crypto_aesimc 7776 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), 7777 (v16i8 V128:$src2))))), 7778 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), 7779 (v16i8 V128:$src2)))))>, 7780 Requires<[HasFuseAES]>; 7781 7782let Predicates = [HasSHA2] in { 7783def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 7784def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 7785def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 7786def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 7787def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 7788def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 7789def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 7790 7791def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 7792def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 7793def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 7794} 7795 7796//---------------------------------------------------------------------------- 7797// Compiler-pseudos 7798//---------------------------------------------------------------------------- 7799// FIXME: Like for X86, these should go in their own separate .td file. 7800 7801// For an anyext, we don't care what the high bits are, so we can perform an 7802// INSERT_SUBREF into an IMPLICIT_DEF. 7803def : Pat<(i64 (anyext GPR32:$src)), 7804 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 7805 7806// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 7807// then assert the extension has happened. 7808def : Pat<(i64 (zext GPR32:$src)), 7809 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 7810 7811// To sign extend, we use a signed bitfield move instruction (SBFM) on the 7812// containing super-reg. 7813def : Pat<(i64 (sext GPR32:$src)), 7814 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 7815def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 7816def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 7817def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 7818def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 7819def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 7820def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 7821def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 7822 7823def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 7824 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7825 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 7826def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 7827 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7828 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 7829 7830def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 7831 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7832 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 7833def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 7834 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7835 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 7836 7837def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 7838 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7839 (i64 (i64shift_a imm0_63:$imm)), 7840 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7841 7842def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), 7843 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7844 (i64 (i64shift_a imm0_63:$imm)), 7845 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7846 7847// sra patterns have an AddedComplexity of 10, so make sure we have a higher 7848// AddedComplexity for the following patterns since we want to match sext + sra 7849// patterns before we attempt to match a single sra node. 7850let AddedComplexity = 20 in { 7851// We support all sext + sra combinations which preserve at least one bit of the 7852// original value which is to be sign extended. E.g. we support shifts up to 7853// bitwidth-1 bits. 7854def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 7855 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 7856def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 7857 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 7858 7859def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 7860 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 7861def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 7862 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 7863 7864def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 7865 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7866 (i64 imm0_31:$imm), 31)>; 7867} // AddedComplexity = 20 7868 7869// To truncate, we can simply extract from a subregister. 7870def : Pat<(i32 (trunc GPR64sp:$src)), 7871 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 7872 7873// __builtin_trap() uses the BRK instruction on AArch64. 7874def : Pat<(trap), (BRK 1)>; 7875def : Pat<(debugtrap), (BRK 0xF000)>; 7876 7877def ubsan_trap_xform : SDNodeXForm<timm, [{ 7878 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 7879}]>; 7880 7881def ubsan_trap_imm : TImmLeaf<i32, [{ 7882 return isUInt<8>(Imm); 7883}], ubsan_trap_xform>; 7884 7885def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 7886 7887// Multiply high patterns which multiply the lower subvector using smull/umull 7888// and the upper subvector with smull2/umull2. Then shuffle the high the high 7889// part of both results together. 7890def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), 7891 (UZP2v16i8 7892 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7893 (EXTRACT_SUBREG V128:$Rm, dsub)), 7894 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7895def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), 7896 (UZP2v8i16 7897 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7898 (EXTRACT_SUBREG V128:$Rm, dsub)), 7899 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7900def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), 7901 (UZP2v4i32 7902 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7903 (EXTRACT_SUBREG V128:$Rm, dsub)), 7904 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7905 7906def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), 7907 (UZP2v16i8 7908 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7909 (EXTRACT_SUBREG V128:$Rm, dsub)), 7910 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7911def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), 7912 (UZP2v8i16 7913 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7914 (EXTRACT_SUBREG V128:$Rm, dsub)), 7915 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7916def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), 7917 (UZP2v4i32 7918 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7919 (EXTRACT_SUBREG V128:$Rm, dsub)), 7920 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7921 7922// Conversions within AdvSIMD types in the same register size are free. 7923// But because we need a consistent lane ordering, in big endian many 7924// conversions require one or more REV instructions. 7925// 7926// Consider a simple memory load followed by a bitconvert then a store. 7927// v0 = load v2i32 7928// v1 = BITCAST v2i32 v0 to v4i16 7929// store v4i16 v2 7930// 7931// In big endian mode every memory access has an implicit byte swap. LDR and 7932// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 7933// is, they treat the vector as a sequence of elements to be byte-swapped. 7934// The two pairs of instructions are fundamentally incompatible. We've decided 7935// to use LD1/ST1 only to simplify compiler implementation. 7936// 7937// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 7938// the original code sequence: 7939// v0 = load v2i32 7940// v1 = REV v2i32 (implicit) 7941// v2 = BITCAST v2i32 v1 to v4i16 7942// v3 = REV v4i16 v2 (implicit) 7943// store v4i16 v3 7944// 7945// But this is now broken - the value stored is different to the value loaded 7946// due to lane reordering. To fix this, on every BITCAST we must perform two 7947// other REVs: 7948// v0 = load v2i32 7949// v1 = REV v2i32 (implicit) 7950// v2 = REV v2i32 7951// v3 = BITCAST v2i32 v2 to v4i16 7952// v4 = REV v4i16 7953// v5 = REV v4i16 v4 (implicit) 7954// store v4i16 v5 7955// 7956// This means an extra two instructions, but actually in most cases the two REV 7957// instructions can be combined into one. For example: 7958// (REV64_2s (REV64_4h X)) === (REV32_4h X) 7959// 7960// There is also no 128-bit REV instruction. This must be synthesized with an 7961// EXT instruction. 7962// 7963// Most bitconverts require some sort of conversion. The only exceptions are: 7964// a) Identity conversions - vNfX <-> vNiX 7965// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 7966// 7967 7968// Natural vector casts (64 bit) 7969foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7970 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7971 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), 7972 (VT FPR64:$src)>; 7973 7974// Natural vector casts (128 bit) 7975foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7976 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7977 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), 7978 (VT FPR128:$src)>; 7979 7980let Predicates = [IsLE] in { 7981def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7982def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7983def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7984def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7985def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7986def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7987 7988def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 7989 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7990def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 7991 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7992def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 7993 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7994def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 7995 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7996def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 7997 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7998def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 7999 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8000def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8001 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8002} 8003let Predicates = [IsBE] in { 8004def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 8005 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8006def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 8007 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8008def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 8009 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8010def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 8011 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8012def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), 8013 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8014def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 8015 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 8016 8017def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 8018 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8019def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 8020 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8021def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 8022 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8023def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 8024 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8025def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 8026 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8027def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 8028 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 8029} 8030def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8031def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8032def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 8033 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8034def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 8035 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8036def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 8037 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8038def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 8039 8040def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 8041 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 8042def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 8043 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 8044def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 8045 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 8046def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 8047 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 8048def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 8049 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 8050 8051def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; 8052def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; 8053 8054let Predicates = [IsLE] in { 8055def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 8056def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 8057def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 8058def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 8059def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; 8060def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 8061} 8062let Predicates = [IsBE] in { 8063def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 8064 (v1i64 (REV64v2i32 FPR64:$src))>; 8065def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 8066 (v1i64 (REV64v4i16 FPR64:$src))>; 8067def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 8068 (v1i64 (REV64v8i8 FPR64:$src))>; 8069def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 8070 (v1i64 (REV64v4i16 FPR64:$src))>; 8071def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), 8072 (v1i64 (REV64v4i16 FPR64:$src))>; 8073def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 8074 (v1i64 (REV64v2i32 FPR64:$src))>; 8075} 8076def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8077def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 8078 8079let Predicates = [IsLE] in { 8080def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 8081def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 8082def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 8083def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8084def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 8085def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 8086def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; 8087} 8088let Predicates = [IsBE] in { 8089def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 8090 (v2i32 (REV64v2i32 FPR64:$src))>; 8091def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 8092 (v2i32 (REV32v4i16 FPR64:$src))>; 8093def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 8094 (v2i32 (REV32v8i8 FPR64:$src))>; 8095def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 8096 (v2i32 (REV64v2i32 FPR64:$src))>; 8097def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 8098 (v2i32 (REV64v2i32 FPR64:$src))>; 8099def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 8100 (v2i32 (REV32v4i16 FPR64:$src))>; 8101def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), 8102 (v2i32 (REV32v4i16 FPR64:$src))>; 8103} 8104def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 8105 8106let Predicates = [IsLE] in { 8107def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 8108def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 8109def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 8110def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8111def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 8112def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 8113} 8114let Predicates = [IsBE] in { 8115def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 8116 (v4i16 (REV64v4i16 FPR64:$src))>; 8117def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 8118 (v4i16 (REV32v4i16 FPR64:$src))>; 8119def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 8120 (v4i16 (REV16v8i8 FPR64:$src))>; 8121def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 8122 (v4i16 (REV64v4i16 FPR64:$src))>; 8123def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 8124 (v4i16 (REV32v4i16 FPR64:$src))>; 8125def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 8126 (v4i16 (REV64v4i16 FPR64:$src))>; 8127} 8128def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 8129def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; 8130 8131let Predicates = [IsLE] in { 8132def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 8133def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 8134def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 8135def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8136def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 8137def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 8138 8139def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8140def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8141def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; 8142def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8143def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; 8144def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 8145} 8146let Predicates = [IsBE] in { 8147def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 8148 (v4f16 (REV64v4i16 FPR64:$src))>; 8149def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 8150 (v4f16 (REV32v4i16 FPR64:$src))>; 8151def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 8152 (v4f16 (REV16v8i8 FPR64:$src))>; 8153def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 8154 (v4f16 (REV64v4i16 FPR64:$src))>; 8155def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 8156 (v4f16 (REV32v4i16 FPR64:$src))>; 8157def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 8158 (v4f16 (REV64v4i16 FPR64:$src))>; 8159 8160def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), 8161 (v4bf16 (REV64v4i16 FPR64:$src))>; 8162def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), 8163 (v4bf16 (REV32v4i16 FPR64:$src))>; 8164def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), 8165 (v4bf16 (REV16v8i8 FPR64:$src))>; 8166def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), 8167 (v4bf16 (REV64v4i16 FPR64:$src))>; 8168def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), 8169 (v4bf16 (REV32v4i16 FPR64:$src))>; 8170def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), 8171 (v4bf16 (REV64v4i16 FPR64:$src))>; 8172} 8173def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 8174def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; 8175 8176let Predicates = [IsLE] in { 8177def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 8178def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 8179def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 8180def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8181def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 8182def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 8183def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 8184def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; 8185} 8186let Predicates = [IsBE] in { 8187def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 8188 (v8i8 (REV64v8i8 FPR64:$src))>; 8189def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 8190 (v8i8 (REV32v8i8 FPR64:$src))>; 8191def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 8192 (v8i8 (REV16v8i8 FPR64:$src))>; 8193def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 8194 (v8i8 (REV64v8i8 FPR64:$src))>; 8195def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 8196 (v8i8 (REV32v8i8 FPR64:$src))>; 8197def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 8198 (v8i8 (REV64v8i8 FPR64:$src))>; 8199def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 8200 (v8i8 (REV16v8i8 FPR64:$src))>; 8201def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), 8202 (v8i8 (REV16v8i8 FPR64:$src))>; 8203} 8204 8205let Predicates = [IsLE] in { 8206def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 8207def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 8208def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 8209def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 8210def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 8211def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; 8212} 8213let Predicates = [IsBE] in { 8214def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 8215 (f64 (REV64v2i32 FPR64:$src))>; 8216def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 8217 (f64 (REV64v4i16 FPR64:$src))>; 8218def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 8219 (f64 (REV64v2i32 FPR64:$src))>; 8220def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 8221 (f64 (REV64v8i8 FPR64:$src))>; 8222def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 8223 (f64 (REV64v4i16 FPR64:$src))>; 8224def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), 8225 (f64 (REV64v4i16 FPR64:$src))>; 8226} 8227def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 8228def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 8229 8230let Predicates = [IsLE] in { 8231def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 8232def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 8233def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 8234def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 8235def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 8236def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; 8237} 8238let Predicates = [IsBE] in { 8239def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 8240 (v1f64 (REV64v2i32 FPR64:$src))>; 8241def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 8242 (v1f64 (REV64v4i16 FPR64:$src))>; 8243def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 8244 (v1f64 (REV64v8i8 FPR64:$src))>; 8245def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 8246 (v1f64 (REV64v2i32 FPR64:$src))>; 8247def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 8248 (v1f64 (REV64v4i16 FPR64:$src))>; 8249def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), 8250 (v1f64 (REV64v4i16 FPR64:$src))>; 8251} 8252def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 8253def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 8254 8255let Predicates = [IsLE] in { 8256def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 8257def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 8258def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 8259def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8260def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 8261def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 8262def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; 8263} 8264let Predicates = [IsBE] in { 8265def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 8266 (v2f32 (REV64v2i32 FPR64:$src))>; 8267def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 8268 (v2f32 (REV32v4i16 FPR64:$src))>; 8269def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 8270 (v2f32 (REV32v8i8 FPR64:$src))>; 8271def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 8272 (v2f32 (REV64v2i32 FPR64:$src))>; 8273def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 8274 (v2f32 (REV64v2i32 FPR64:$src))>; 8275def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 8276 (v2f32 (REV32v4i16 FPR64:$src))>; 8277def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), 8278 (v2f32 (REV32v4i16 FPR64:$src))>; 8279} 8280def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 8281 8282let Predicates = [IsLE] in { 8283def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 8284def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 8285def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 8286def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 8287def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 8288def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 8289def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; 8290def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 8291} 8292let Predicates = [IsBE] in { 8293def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 8294 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8295def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 8296 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8297 (REV64v4i32 FPR128:$src), (i32 8)))>; 8298def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 8299 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8300 (REV64v8i16 FPR128:$src), (i32 8)))>; 8301def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 8302 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8303 (REV64v8i16 FPR128:$src), (i32 8)))>; 8304def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), 8305 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8306 (REV64v8i16 FPR128:$src), (i32 8)))>; 8307def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 8308 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8309def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 8310 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8311 (REV64v4i32 FPR128:$src), (i32 8)))>; 8312def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 8313 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 8314 (REV64v16i8 FPR128:$src), (i32 8)))>; 8315} 8316 8317let Predicates = [IsLE] in { 8318def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 8319def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 8320def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 8321def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 8322def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; 8323def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 8324def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 8325} 8326let Predicates = [IsBE] in { 8327def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 8328 (v2f64 (EXTv16i8 FPR128:$src, 8329 FPR128:$src, (i32 8)))>; 8330def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 8331 (v2f64 (REV64v4i32 FPR128:$src))>; 8332def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 8333 (v2f64 (REV64v8i16 FPR128:$src))>; 8334def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 8335 (v2f64 (REV64v8i16 FPR128:$src))>; 8336def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), 8337 (v2f64 (REV64v8i16 FPR128:$src))>; 8338def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 8339 (v2f64 (REV64v16i8 FPR128:$src))>; 8340def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 8341 (v2f64 (REV64v4i32 FPR128:$src))>; 8342} 8343def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 8344 8345let Predicates = [IsLE] in { 8346def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 8347def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 8348def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 8349def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; 8350def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 8351def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 8352def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 8353} 8354let Predicates = [IsBE] in { 8355def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 8356 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8357 (REV64v4i32 FPR128:$src), (i32 8)))>; 8358def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 8359 (v4f32 (REV32v8i16 FPR128:$src))>; 8360def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 8361 (v4f32 (REV32v8i16 FPR128:$src))>; 8362def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), 8363 (v4f32 (REV32v8i16 FPR128:$src))>; 8364def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 8365 (v4f32 (REV32v16i8 FPR128:$src))>; 8366def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 8367 (v4f32 (REV64v4i32 FPR128:$src))>; 8368def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 8369 (v4f32 (REV64v4i32 FPR128:$src))>; 8370} 8371def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 8372 8373let Predicates = [IsLE] in { 8374def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 8375def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 8376def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 8377def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 8378def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 8379def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 8380def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; 8381} 8382let Predicates = [IsBE] in { 8383def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 8384 (v2i64 (EXTv16i8 FPR128:$src, 8385 FPR128:$src, (i32 8)))>; 8386def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 8387 (v2i64 (REV64v4i32 FPR128:$src))>; 8388def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 8389 (v2i64 (REV64v8i16 FPR128:$src))>; 8390def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 8391 (v2i64 (REV64v16i8 FPR128:$src))>; 8392def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 8393 (v2i64 (REV64v4i32 FPR128:$src))>; 8394def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 8395 (v2i64 (REV64v8i16 FPR128:$src))>; 8396def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), 8397 (v2i64 (REV64v8i16 FPR128:$src))>; 8398} 8399def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 8400 8401let Predicates = [IsLE] in { 8402def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 8403def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 8404def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 8405def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 8406def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 8407def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 8408def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; 8409} 8410let Predicates = [IsBE] in { 8411def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 8412 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8413 (REV64v4i32 FPR128:$src), 8414 (i32 8)))>; 8415def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 8416 (v4i32 (REV64v4i32 FPR128:$src))>; 8417def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 8418 (v4i32 (REV32v8i16 FPR128:$src))>; 8419def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 8420 (v4i32 (REV32v16i8 FPR128:$src))>; 8421def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 8422 (v4i32 (REV64v4i32 FPR128:$src))>; 8423def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 8424 (v4i32 (REV32v8i16 FPR128:$src))>; 8425def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), 8426 (v4i32 (REV32v8i16 FPR128:$src))>; 8427} 8428def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 8429 8430let Predicates = [IsLE] in { 8431def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 8432def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 8433def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 8434def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 8435def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 8436def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 8437} 8438let Predicates = [IsBE] in { 8439def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 8440 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8441 (REV64v8i16 FPR128:$src), 8442 (i32 8)))>; 8443def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 8444 (v8i16 (REV64v8i16 FPR128:$src))>; 8445def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 8446 (v8i16 (REV32v8i16 FPR128:$src))>; 8447def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 8448 (v8i16 (REV16v16i8 FPR128:$src))>; 8449def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 8450 (v8i16 (REV64v8i16 FPR128:$src))>; 8451def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 8452 (v8i16 (REV32v8i16 FPR128:$src))>; 8453} 8454def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 8455def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; 8456 8457let Predicates = [IsLE] in { 8458def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 8459def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 8460def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 8461def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 8462def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 8463def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 8464 8465def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; 8466def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8467def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8468def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; 8469def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8470def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8471} 8472let Predicates = [IsBE] in { 8473def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 8474 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8475 (REV64v8i16 FPR128:$src), 8476 (i32 8)))>; 8477def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 8478 (v8f16 (REV64v8i16 FPR128:$src))>; 8479def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 8480 (v8f16 (REV32v8i16 FPR128:$src))>; 8481def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 8482 (v8f16 (REV16v16i8 FPR128:$src))>; 8483def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 8484 (v8f16 (REV64v8i16 FPR128:$src))>; 8485def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 8486 (v8f16 (REV32v8i16 FPR128:$src))>; 8487 8488def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), 8489 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8490 (REV64v8i16 FPR128:$src), 8491 (i32 8)))>; 8492def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), 8493 (v8bf16 (REV64v8i16 FPR128:$src))>; 8494def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), 8495 (v8bf16 (REV32v8i16 FPR128:$src))>; 8496def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), 8497 (v8bf16 (REV16v16i8 FPR128:$src))>; 8498def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), 8499 (v8bf16 (REV64v8i16 FPR128:$src))>; 8500def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), 8501 (v8bf16 (REV32v8i16 FPR128:$src))>; 8502} 8503def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 8504def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; 8505 8506let Predicates = [IsLE] in { 8507def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 8508def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 8509def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 8510def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 8511def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 8512def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 8513def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 8514def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; 8515} 8516let Predicates = [IsBE] in { 8517def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 8518 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 8519 (REV64v16i8 FPR128:$src), 8520 (i32 8)))>; 8521def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 8522 (v16i8 (REV64v16i8 FPR128:$src))>; 8523def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 8524 (v16i8 (REV32v16i8 FPR128:$src))>; 8525def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 8526 (v16i8 (REV16v16i8 FPR128:$src))>; 8527def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 8528 (v16i8 (REV64v16i8 FPR128:$src))>; 8529def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 8530 (v16i8 (REV32v16i8 FPR128:$src))>; 8531def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 8532 (v16i8 (REV16v16i8 FPR128:$src))>; 8533def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), 8534 (v16i8 (REV16v16i8 FPR128:$src))>; 8535} 8536 8537def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 8538 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8539def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 8540 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8541def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 8542 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8543def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 8544 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8545def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), 8546 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8547def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 8548 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8549def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 8550 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8551def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 8552 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8553 8554def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 8555 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8556def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 8557 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8558def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 8559 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8560def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 8561 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8562 8563// A 64-bit subvector insert to the first 128-bit vector position 8564// is a subregister copy that needs no instruction. 8565multiclass InsertSubvectorUndef<ValueType Ty> { 8566 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), 8567 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8568 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), 8569 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8570 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), 8571 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8572 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), 8573 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8574 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), 8575 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8576 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), 8577 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8578 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), 8579 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8580 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), 8581 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8582} 8583 8584defm : InsertSubvectorUndef<i32>; 8585defm : InsertSubvectorUndef<i64>; 8586 8587// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 8588// or v2f32. 8589def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 8590 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 8591 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 8592def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 8593 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 8594 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 8595 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 8596 // so we match on v4f32 here, not v2f32. This will also catch adding 8597 // the low two lanes of a true v4f32 vector. 8598def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 8599 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 8600 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8601def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 8602 (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 8603 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8604 8605// Prefer using the bottom lanes of addp Rn, Rn compared to 8606// addp extractlow(Rn), extracthigh(Rn) 8607def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))), 8608 (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))), 8609 (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>; 8610def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))), 8611 (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))), 8612 (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>; 8613def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))), 8614 (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))), 8615 (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>; 8616 8617def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))), 8618 (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))), 8619 (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>; 8620def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))), 8621 (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))), 8622 (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>; 8623 8624// Scalar 64-bit shifts in FPR64 registers. 8625def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8626 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8627def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8628 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8629def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8630 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8631def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8632 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8633 8634// Patterns for nontemporal/no-allocate stores. 8635// We have to resort to tricks to turn a single-input store into a store pair, 8636// because there is no single-input nontemporal store, only STNP. 8637let Predicates = [IsLE] in { 8638let AddedComplexity = 15 in { 8639class NTStore128Pat<ValueType VT> : 8640 Pat<(nontemporalstore (VT FPR128:$Rt), 8641 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 8642 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 8643 (DUPi64 FPR128:$Rt, (i64 1)), 8644 GPR64sp:$Rn, simm7s8:$offset)>; 8645 8646def : NTStore128Pat<v2i64>; 8647def : NTStore128Pat<v4i32>; 8648def : NTStore128Pat<v8i16>; 8649def : NTStore128Pat<v16i8>; 8650 8651class NTStore64Pat<ValueType VT> : 8652 Pat<(nontemporalstore (VT FPR64:$Rt), 8653 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8654 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 8655 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 8656 GPR64sp:$Rn, simm7s4:$offset)>; 8657 8658// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 8659def : NTStore64Pat<v1f64>; 8660def : NTStore64Pat<v1i64>; 8661def : NTStore64Pat<v2i32>; 8662def : NTStore64Pat<v4i16>; 8663def : NTStore64Pat<v8i8>; 8664 8665def : Pat<(nontemporalstore GPR64:$Rt, 8666 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8667 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 8668 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), 8669 GPR64sp:$Rn, simm7s4:$offset)>; 8670} // AddedComplexity=10 8671} // Predicates = [IsLE] 8672 8673// Tail call return handling. These are all compiler pseudo-instructions, 8674// so no encoding information or anything like that. 8675let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 8676 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, 8677 Sched<[WriteBrReg]>; 8678 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, 8679 Sched<[WriteBrReg]>; 8680 // Indirect tail-call with any register allowed, used by MachineOutliner when 8681 // this is proven safe. 8682 // FIXME: If we have to add any more hacks like this, we should instead relax 8683 // some verifier checks for outlined functions. 8684 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, 8685 Sched<[WriteBrReg]>; 8686 // Indirect tail-call limited to only use registers (x16 and x17) which are 8687 // allowed to tail-call a "BTI c" instruction. 8688 def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, 8689 Sched<[WriteBrReg]>; 8690} 8691 8692def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 8693 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, 8694 Requires<[NotUseBTI]>; 8695def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), 8696 (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, 8697 Requires<[UseBTI]>; 8698def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 8699 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8700def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 8701 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8702 8703def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; 8704def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; 8705 8706// Extracting lane zero is a special case where we can just use a plain 8707// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the 8708// rest of the compiler, especially the register allocator and copy propagation, 8709// to reason about, so is preferred when it's possible to use it. 8710let AddedComplexity = 10 in { 8711 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; 8712 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; 8713 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; 8714} 8715 8716// dot_v4i8 8717class mul_v4i8<SDPatternOperator ldop> : 8718 PatFrag<(ops node:$Rn, node:$Rm, node:$offset), 8719 (mul (ldop (add node:$Rn, node:$offset)), 8720 (ldop (add node:$Rm, node:$offset)))>; 8721class mulz_v4i8<SDPatternOperator ldop> : 8722 PatFrag<(ops node:$Rn, node:$Rm), 8723 (mul (ldop node:$Rn), (ldop node:$Rm))>; 8724 8725def load_v4i8 : 8726 OutPatFrag<(ops node:$R), 8727 (INSERT_SUBREG 8728 (v2i32 (IMPLICIT_DEF)), 8729 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), 8730 ssub)>; 8731 8732class dot_v4i8<Instruction DOT, SDPatternOperator ldop> : 8733 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), 8734 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), 8735 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), 8736 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))), 8737 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), 8738 (load_v4i8 GPR64sp:$Rn), 8739 (load_v4i8 GPR64sp:$Rm))), 8740 sub_32)>, Requires<[HasDotProd]>; 8741 8742// dot_v8i8 8743class ee_v8i8<SDPatternOperator extend> : 8744 PatFrag<(ops node:$V, node:$K), 8745 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; 8746 8747class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8748 PatFrag<(ops node:$M, node:$N, node:$K), 8749 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)), 8750 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>; 8751 8752class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8753 PatFrag<(ops node:$M, node:$N), 8754 (i32 (extractelt 8755 (v4i32 (AArch64uaddv 8756 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)), 8757 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))), 8758 (i64 0)))>; 8759 8760// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 8761def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; 8762 8763class odot_v8i8<Instruction DOT> : 8764 OutPatFrag<(ops node:$Vm, node:$Vn), 8765 (EXTRACT_SUBREG 8766 (VADDV_32 8767 (i64 (DOT (DUPv2i32gpr WZR), 8768 (v8i8 node:$Vm), 8769 (v8i8 node:$Vn)))), 8770 sub_32)>; 8771 8772class dot_v8i8<Instruction DOT, SDPatternOperator mulop, 8773 SDPatternOperator extend> : 8774 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn), 8775 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>, 8776 Requires<[HasDotProd]>; 8777 8778// dot_v16i8 8779class ee_v16i8<SDPatternOperator extend> : 8780 PatFrag<(ops node:$V, node:$K1, node:$K2), 8781 (v4i16 (extract_subvector 8782 (v8i16 (extend 8783 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; 8784 8785class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> : 8786 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), 8787 (v4i32 8788 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)), 8789 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>; 8790 8791class idot_v16i8<SDPatternOperator m, SDPatternOperator x> : 8792 PatFrag<(ops node:$M, node:$N), 8793 (i32 (extractelt 8794 (v4i32 (AArch64uaddv 8795 (add 8796 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)), 8797 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))), 8798 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)), 8799 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))), 8800 (i64 0)))>; 8801 8802class odot_v16i8<Instruction DOT> : 8803 OutPatFrag<(ops node:$Vm, node:$Vn), 8804 (i32 (ADDVv4i32v 8805 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; 8806 8807class dot_v16i8<Instruction DOT, SDPatternOperator mulop, 8808 SDPatternOperator extend> : 8809 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn), 8810 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>, 8811 Requires<[HasDotProd]>; 8812 8813let AddedComplexity = 10 in { 8814 def : dot_v4i8<SDOTv8i8, sextloadi8>; 8815 def : dot_v4i8<UDOTv8i8, zextloadi8>; 8816 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>; 8817 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>; 8818 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>; 8819 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>; 8820 8821 // FIXME: add patterns to generate vector by element dot product. 8822 // FIXME: add SVE dot-product patterns. 8823} 8824 8825// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, 8826// so that it can be used as input to inline asm, and vice versa. 8827def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; 8828def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; 8829def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, 8830 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), 8831 (REG_SEQUENCE GPR64x8Class, 8832 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, 8833 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; 8834foreach i = 0-7 in { 8835 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), 8836 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; 8837} 8838 8839let Predicates = [HasLS64] in { 8840 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 8841 (outs GPR64x8:$Rt)>; 8842 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 8843 (outs)>; 8844 def ST64BV: Store64BV<0b011, "st64bv">; 8845 def ST64BV0: Store64BV<0b010, "st64bv0">; 8846 8847 class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 8848 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 8849 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 8850 8851 def : ST64BPattern<int_aarch64_st64b, ST64B>; 8852 def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 8853 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 8854} 8855 8856let Predicates = [HasMOPS] in { 8857 let Defs = [NZCV] in { 8858 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; 8859 8860 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; 8861 8862 defm SETP : MOPSMemorySetInsns<0b00, "setp">; 8863 } 8864 let Uses = [NZCV] in { 8865 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; 8866 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; 8867 8868 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; 8869 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; 8870 8871 defm SETM : MOPSMemorySetInsns<0b01, "setm">; 8872 defm SETE : MOPSMemorySetInsns<0b10, "sete">; 8873 } 8874} 8875let Predicates = [HasMOPS, HasMTE] in { 8876 let Defs = [NZCV] in { 8877 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; 8878 } 8879 let Uses = [NZCV] in { 8880 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; 8881 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td 8882 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; 8883 } 8884} 8885 8886// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain 8887// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain 8888def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; 8889def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; 8890def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; 8891def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; 8892def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; 8893 8894// MOPS operations always contain three 4-byte instructions 8895let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { 8896 let mayLoad = 1 in { 8897 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8898 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8899 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8900 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8901 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8902 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8903 } 8904 let mayLoad = 0 in { 8905 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8906 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8907 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8908 } 8909} 8910let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { 8911 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8912 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8913 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8914} 8915 8916//----------------------------------------------------------------------------- 8917// v8.3 Pointer Authentication late patterns 8918 8919let Predicates = [HasPAuth] in { 8920def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm), 8921 (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>; 8922def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn), 8923 (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>; 8924} 8925 8926//----------------------------------------------------------------------------- 8927 8928// This gets lowered into an instruction sequence of 20 bytes 8929let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in 8930def StoreSwiftAsyncContext 8931 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), 8932 []>, Sched<[]>; 8933 8934def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; 8935def : Pat<(AArch64AssertZExtBool GPR32:$op), 8936 (i32 GPR32:$op)>; 8937 8938//===----------------------------===// 8939// 2022 Architecture Extensions: 8940//===----------------------------===// 8941 8942def : InstAlias<"clrbhb", (HINT 22), 0>; 8943let Predicates = [HasCLRBHB] in { 8944 def : InstAlias<"clrbhb", (HINT 22), 1>; 8945} 8946 8947//===----------------------------------------------------------------------===// 8948// Translation Hardening Extension (FEAT_THE) 8949//===----------------------------------------------------------------------===// 8950defm RCW : ReadCheckWriteCompareAndSwap; 8951 8952defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; 8953defm RCWSET : ReadCheckWriteOperation<0b011, "set">; 8954defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; 8955 8956//===----------------------------------------------------------------------===// 8957// General Data-Processing Instructions (FEAT_V94_DP) 8958//===----------------------------------------------------------------------===// 8959defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; 8960defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; 8961defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; 8962 8963defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; 8964defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; 8965defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; 8966defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; 8967 8968def RPRFM: 8969 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), 8970 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, 8971 Sched<[]> { 8972 bits<6> Rt; 8973 bits<5> Rn; 8974 bits<5> Rm; 8975 let Inst{2-0} = Rt{2-0}; 8976 let Inst{4-3} = 0b11; 8977 let Inst{9-5} = Rn; 8978 let Inst{11-10} = 0b10; 8979 let Inst{13-12} = Rt{4-3}; 8980 let Inst{14} = 0b1; 8981 let Inst{15} = Rt{5}; 8982 let Inst{20-16} = Rm; 8983 let Inst{31-21} = 0b11111000101; 8984 let mayLoad = 0; 8985 let mayStore = 0; 8986 let hasSideEffects = 1; 8987 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns 8988 // Fail, the decoder should attempt to decode RPRFM. This requires setting 8989 // the decoder namespace to "Fallback". 8990 let DecoderNamespace = "Fallback"; 8991} 8992 8993//===----------------------------------------------------------------------===// 8994// 128-bit Atomics (FEAT_LSE128) 8995//===----------------------------------------------------------------------===// 8996let Predicates = [HasLSE128] in { 8997 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; 8998 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; 8999 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; 9000 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; 9001 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; 9002 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; 9003 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; 9004 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; 9005 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; 9006 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; 9007 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; 9008 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; 9009} 9010 9011//===----------------------------------------------------------------------===// 9012// RCPC Instructions (FEAT_LRCPC3) 9013//===----------------------------------------------------------------------===// 9014 9015let Predicates = [HasRCPC3] in { 9016 // size opc opc2 9017 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; 9018 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; 9019 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9020 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 9021 def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; 9022 def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; 9023 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9024 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 9025 9026 def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>; 9027 def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>; 9028 9029 // Aliases for when offset=0 9030 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; 9031 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; 9032 9033 // size opc 9034 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; 9035 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; 9036 def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; 9037 def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; 9038} 9039 9040let Predicates = [HasRCPC3, HasNEON] in { 9041 // size opc regtype 9042 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9043 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9044 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9045 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9046 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 9047 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9048 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9049 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9050 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9051 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 9052 9053 // L 9054 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; 9055 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; 9056 9057 // Aliases for when offset=0 9058 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; 9059} 9060 9061//===----------------------------------------------------------------------===// 9062// 128-bit System Instructions (FEAT_SYSINSTR128) 9063//===----------------------------------------------------------------------===// 9064let Predicates = [HasD128] in { 9065 def SYSPxt : SystemPXtI<0, "sysp">; 9066 9067 def SYSPxt_XZR 9068 : BaseSystemI<0, (outs), 9069 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), 9070 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, 9071 Sched<[WriteSys]> 9072 { 9073 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) 9074 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields 9075 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one 9076 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc 9077 // is based off of the asm template (maybe) and therefore wants to print 5 operands. 9078 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would 9079 // overlap with the main SYSP instruction. 9080 let DecoderMethod = "DecodeSyspXzrInstruction"; 9081 bits<3> op1; 9082 bits<4> Cn; 9083 bits<4> Cm; 9084 bits<3> op2; 9085 let Inst{22} = 0b1; // override BaseSystemI 9086 let Inst{20-19} = 0b01; 9087 let Inst{18-16} = op1; 9088 let Inst{15-12} = Cn; 9089 let Inst{11-8} = Cm; 9090 let Inst{7-5} = op2; 9091 let Inst{4-0} = 0b11111; 9092 } 9093 9094 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", 9095 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 9096} 9097 9098//--- 9099// 128-bit System Registers (FEAT_SYSREG128) 9100//--- 9101 9102// Instruction encoding: 9103// 9104// 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 9105// MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt 9106// MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt 9107 9108// Instruction syntax: 9109// 9110// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>> 9111// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1> 9112// 9113// ...where t is even (X0, X2, etc). 9114 9115let Predicates = [HasD128] in { 9116 def MRRS : RtSystemI128<1, 9117 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), 9118 "mrrs", "\t$Rt, $systemreg"> 9119 { 9120 bits<16> systemreg; 9121 let Inst{20-5} = systemreg; 9122 } 9123 9124 def MSRR : RtSystemI128<0, 9125 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), 9126 "msrr", "\t$systemreg, $Rt"> 9127 { 9128 bits<16> systemreg; 9129 let Inst{20-5} = systemreg; 9130 } 9131} 9132 9133 9134include "AArch64InstrAtomics.td" 9135include "AArch64SVEInstrInfo.td" 9136include "AArch64SMEInstrInfo.td" 9137include "AArch64InstrGISel.td" 9138