1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the ARM NEON instruction set. 10// 11//===----------------------------------------------------------------------===// 12 13 14//===----------------------------------------------------------------------===// 15// NEON-specific Operands. 16//===----------------------------------------------------------------------===// 17def nModImm : Operand<i32> { 18 let PrintMethod = "printVMOVModImmOperand"; 19} 20 21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 22def nImmSplatI8 : Operand<i32> { 23 let PrintMethod = "printVMOVModImmOperand"; 24 let ParserMatchClass = nImmSplatI8AsmOperand; 25} 26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 27def nImmSplatI16 : Operand<i32> { 28 let PrintMethod = "printVMOVModImmOperand"; 29 let ParserMatchClass = nImmSplatI16AsmOperand; 30} 31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 32def nImmSplatI32 : Operand<i32> { 33 let PrintMethod = "printVMOVModImmOperand"; 34 let ParserMatchClass = nImmSplatI32AsmOperand; 35} 36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 37def nImmSplatNotI16 : Operand<i32> { 38 let ParserMatchClass = nImmSplatNotI16AsmOperand; 39} 40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 41def nImmSplatNotI32 : Operand<i32> { 42 let ParserMatchClass = nImmSplatNotI32AsmOperand; 43} 44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 45def nImmVMOVI32 : Operand<i32> { 46 let PrintMethod = "printVMOVModImmOperand"; 47 let ParserMatchClass = nImmVMOVI32AsmOperand; 48} 49 50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 51 : AsmOperandClass { 52 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 53 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 54 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 55} 56 57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 58 : AsmOperandClass { 59 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 60 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 61 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 62} 63 64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 65 let PrintMethod = "printVMOVModImmOperand"; 66 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 67} 68 69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 70 let PrintMethod = "printVMOVModImmOperand"; 71 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 72} 73 74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 75def nImmVMOVI32Neg : Operand<i32> { 76 let PrintMethod = "printVMOVModImmOperand"; 77 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 78} 79def nImmVMOVF32 : Operand<i32> { 80 let PrintMethod = "printFPImmOperand"; 81 let ParserMatchClass = FPImmOperand; 82} 83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 84def nImmSplatI64 : Operand<i32> { 85 let PrintMethod = "printVMOVModImmOperand"; 86 let ParserMatchClass = nImmSplatI64AsmOperand; 87} 88 89def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 94 return ((uint64_t)Imm) < 8; 95}]> { 96 let ParserMatchClass = VectorIndex8Operand; 97 let PrintMethod = "printVectorIndex"; 98 let MIOperandInfo = (ops i32imm); 99} 100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 101 return ((uint64_t)Imm) < 4; 102}]> { 103 let ParserMatchClass = VectorIndex16Operand; 104 let PrintMethod = "printVectorIndex"; 105 let MIOperandInfo = (ops i32imm); 106} 107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 108 return ((uint64_t)Imm) < 2; 109}]> { 110 let ParserMatchClass = VectorIndex32Operand; 111 let PrintMethod = "printVectorIndex"; 112 let MIOperandInfo = (ops i32imm); 113} 114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 115 return ((uint64_t)Imm) < 1; 116}]> { 117 let ParserMatchClass = VectorIndex64Operand; 118 let PrintMethod = "printVectorIndex"; 119 let MIOperandInfo = (ops i32imm); 120} 121 122// Register list of one D register. 123def VecListOneDAsmOperand : AsmOperandClass { 124 let Name = "VecListOneD"; 125 let ParserMethod = "parseVectorList"; 126 let RenderMethod = "addVecListOperands"; 127} 128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 129 let ParserMatchClass = VecListOneDAsmOperand; 130} 131// Register list of two sequential D registers. 132def VecListDPairAsmOperand : AsmOperandClass { 133 let Name = "VecListDPair"; 134 let ParserMethod = "parseVectorList"; 135 let RenderMethod = "addVecListOperands"; 136} 137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 138 let ParserMatchClass = VecListDPairAsmOperand; 139} 140// Register list of three sequential D registers. 141def VecListThreeDAsmOperand : AsmOperandClass { 142 let Name = "VecListThreeD"; 143 let ParserMethod = "parseVectorList"; 144 let RenderMethod = "addVecListOperands"; 145} 146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 147 let ParserMatchClass = VecListThreeDAsmOperand; 148} 149// Register list of four sequential D registers. 150def VecListFourDAsmOperand : AsmOperandClass { 151 let Name = "VecListFourD"; 152 let ParserMethod = "parseVectorList"; 153 let RenderMethod = "addVecListOperands"; 154} 155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 156 let ParserMatchClass = VecListFourDAsmOperand; 157} 158// Register list of two D registers spaced by 2 (two sequential Q registers). 159def VecListDPairSpacedAsmOperand : AsmOperandClass { 160 let Name = "VecListDPairSpaced"; 161 let ParserMethod = "parseVectorList"; 162 let RenderMethod = "addVecListOperands"; 163} 164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 165 let ParserMatchClass = VecListDPairSpacedAsmOperand; 166} 167// Register list of three D registers spaced by 2 (three Q registers). 168def VecListThreeQAsmOperand : AsmOperandClass { 169 let Name = "VecListThreeQ"; 170 let ParserMethod = "parseVectorList"; 171 let RenderMethod = "addVecListOperands"; 172} 173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 174 let ParserMatchClass = VecListThreeQAsmOperand; 175} 176// Register list of three D registers spaced by 2 (three Q registers). 177def VecListFourQAsmOperand : AsmOperandClass { 178 let Name = "VecListFourQ"; 179 let ParserMethod = "parseVectorList"; 180 let RenderMethod = "addVecListOperands"; 181} 182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 183 let ParserMatchClass = VecListFourQAsmOperand; 184} 185 186// Register list of one D register, with "all lanes" subscripting. 187def VecListOneDAllLanesAsmOperand : AsmOperandClass { 188 let Name = "VecListOneDAllLanes"; 189 let ParserMethod = "parseVectorList"; 190 let RenderMethod = "addVecListOperands"; 191} 192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 193 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 194} 195// Register list of two D registers, with "all lanes" subscripting. 196def VecListDPairAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListDPairAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListDPairAllLanes : RegisterOperand<DPair, 202 "printVectorListTwoAllLanes"> { 203 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 204} 205// Register list of two D registers spaced by 2 (two sequential Q registers). 206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 207 let Name = "VecListDPairSpacedAllLanes"; 208 let ParserMethod = "parseVectorList"; 209 let RenderMethod = "addVecListOperands"; 210} 211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 212 "printVectorListTwoSpacedAllLanes"> { 213 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 214} 215// Register list of three D registers, with "all lanes" subscripting. 216def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 217 let Name = "VecListThreeDAllLanes"; 218 let ParserMethod = "parseVectorList"; 219 let RenderMethod = "addVecListOperands"; 220} 221def VecListThreeDAllLanes : RegisterOperand<DPR, 222 "printVectorListThreeAllLanes"> { 223 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 224} 225// Register list of three D registers spaced by 2 (three sequential Q regs). 226def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 227 let Name = "VecListThreeQAllLanes"; 228 let ParserMethod = "parseVectorList"; 229 let RenderMethod = "addVecListOperands"; 230} 231def VecListThreeQAllLanes : RegisterOperand<DPR, 232 "printVectorListThreeSpacedAllLanes"> { 233 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 234} 235// Register list of four D registers, with "all lanes" subscripting. 236def VecListFourDAllLanesAsmOperand : AsmOperandClass { 237 let Name = "VecListFourDAllLanes"; 238 let ParserMethod = "parseVectorList"; 239 let RenderMethod = "addVecListOperands"; 240} 241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 242 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 243} 244// Register list of four D registers spaced by 2 (four sequential Q regs). 245def VecListFourQAllLanesAsmOperand : AsmOperandClass { 246 let Name = "VecListFourQAllLanes"; 247 let ParserMethod = "parseVectorList"; 248 let RenderMethod = "addVecListOperands"; 249} 250def VecListFourQAllLanes : RegisterOperand<DPR, 251 "printVectorListFourSpacedAllLanes"> { 252 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 253} 254 255 256// Register list of one D register, with byte lane subscripting. 257def VecListOneDByteIndexAsmOperand : AsmOperandClass { 258 let Name = "VecListOneDByteIndexed"; 259 let ParserMethod = "parseVectorList"; 260 let RenderMethod = "addVecListIndexedOperands"; 261} 262def VecListOneDByteIndexed : Operand<i32> { 263 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 264 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 265} 266// ...with half-word lane subscripting. 267def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 268 let Name = "VecListOneDHWordIndexed"; 269 let ParserMethod = "parseVectorList"; 270 let RenderMethod = "addVecListIndexedOperands"; 271} 272def VecListOneDHWordIndexed : Operand<i32> { 273 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 274 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 275} 276// ...with word lane subscripting. 277def VecListOneDWordIndexAsmOperand : AsmOperandClass { 278 let Name = "VecListOneDWordIndexed"; 279 let ParserMethod = "parseVectorList"; 280 let RenderMethod = "addVecListIndexedOperands"; 281} 282def VecListOneDWordIndexed : Operand<i32> { 283 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 284 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 285} 286 287// Register list of two D registers with byte lane subscripting. 288def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoDByteIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoDByteIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297// ...with half-word lane subscripting. 298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 299 let Name = "VecListTwoDHWordIndexed"; 300 let ParserMethod = "parseVectorList"; 301 let RenderMethod = "addVecListIndexedOperands"; 302} 303def VecListTwoDHWordIndexed : Operand<i32> { 304 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 305 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 306} 307// ...with word lane subscripting. 308def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 309 let Name = "VecListTwoDWordIndexed"; 310 let ParserMethod = "parseVectorList"; 311 let RenderMethod = "addVecListIndexedOperands"; 312} 313def VecListTwoDWordIndexed : Operand<i32> { 314 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 316} 317// Register list of two Q registers with half-word lane subscripting. 318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 319 let Name = "VecListTwoQHWordIndexed"; 320 let ParserMethod = "parseVectorList"; 321 let RenderMethod = "addVecListIndexedOperands"; 322} 323def VecListTwoQHWordIndexed : Operand<i32> { 324 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 326} 327// ...with word lane subscripting. 328def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 329 let Name = "VecListTwoQWordIndexed"; 330 let ParserMethod = "parseVectorList"; 331 let RenderMethod = "addVecListIndexedOperands"; 332} 333def VecListTwoQWordIndexed : Operand<i32> { 334 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 336} 337 338 339// Register list of three D registers with byte lane subscripting. 340def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeDByteIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeDByteIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349// ...with half-word lane subscripting. 350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 351 let Name = "VecListThreeDHWordIndexed"; 352 let ParserMethod = "parseVectorList"; 353 let RenderMethod = "addVecListIndexedOperands"; 354} 355def VecListThreeDHWordIndexed : Operand<i32> { 356 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 357 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 358} 359// ...with word lane subscripting. 360def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 361 let Name = "VecListThreeDWordIndexed"; 362 let ParserMethod = "parseVectorList"; 363 let RenderMethod = "addVecListIndexedOperands"; 364} 365def VecListThreeDWordIndexed : Operand<i32> { 366 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 367 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 368} 369// Register list of three Q registers with half-word lane subscripting. 370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 371 let Name = "VecListThreeQHWordIndexed"; 372 let ParserMethod = "parseVectorList"; 373 let RenderMethod = "addVecListIndexedOperands"; 374} 375def VecListThreeQHWordIndexed : Operand<i32> { 376 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 377 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 378} 379// ...with word lane subscripting. 380def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 381 let Name = "VecListThreeQWordIndexed"; 382 let ParserMethod = "parseVectorList"; 383 let RenderMethod = "addVecListIndexedOperands"; 384} 385def VecListThreeQWordIndexed : Operand<i32> { 386 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 387 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 388} 389 390// Register list of four D registers with byte lane subscripting. 391def VecListFourDByteIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourDByteIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourDByteIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400// ...with half-word lane subscripting. 401def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 402 let Name = "VecListFourDHWordIndexed"; 403 let ParserMethod = "parseVectorList"; 404 let RenderMethod = "addVecListIndexedOperands"; 405} 406def VecListFourDHWordIndexed : Operand<i32> { 407 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 408 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 409} 410// ...with word lane subscripting. 411def VecListFourDWordIndexAsmOperand : AsmOperandClass { 412 let Name = "VecListFourDWordIndexed"; 413 let ParserMethod = "parseVectorList"; 414 let RenderMethod = "addVecListIndexedOperands"; 415} 416def VecListFourDWordIndexed : Operand<i32> { 417 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 418 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 419} 420// Register list of four Q registers with half-word lane subscripting. 421def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 422 let Name = "VecListFourQHWordIndexed"; 423 let ParserMethod = "parseVectorList"; 424 let RenderMethod = "addVecListIndexedOperands"; 425} 426def VecListFourQHWordIndexed : Operand<i32> { 427 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 428 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 429} 430// ...with word lane subscripting. 431def VecListFourQWordIndexAsmOperand : AsmOperandClass { 432 let Name = "VecListFourQWordIndexed"; 433 let ParserMethod = "parseVectorList"; 434 let RenderMethod = "addVecListIndexedOperands"; 435} 436def VecListFourQWordIndexed : Operand<i32> { 437 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 438 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 439} 440 441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 442 return cast<LoadSDNode>(N)->getAlignment() >= 8; 443}]>; 444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 445 (store node:$val, node:$ptr), [{ 446 return cast<StoreSDNode>(N)->getAlignment() >= 8; 447}]>; 448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 449 return cast<LoadSDNode>(N)->getAlignment() == 4; 450}]>; 451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 452 (store node:$val, node:$ptr), [{ 453 return cast<StoreSDNode>(N)->getAlignment() == 4; 454}]>; 455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 456 return cast<LoadSDNode>(N)->getAlignment() == 2; 457}]>; 458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 459 (store node:$val, node:$ptr), [{ 460 return cast<StoreSDNode>(N)->getAlignment() == 2; 461}]>; 462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 463 return cast<LoadSDNode>(N)->getAlignment() == 1; 464}]>; 465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 466 (store node:$val, node:$ptr), [{ 467 return cast<StoreSDNode>(N)->getAlignment() == 1; 468}]>; 469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 470 return cast<LoadSDNode>(N)->getAlignment() < 4; 471}]>; 472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 473 (store node:$val, node:$ptr), [{ 474 return cast<StoreSDNode>(N)->getAlignment() < 4; 475}]>; 476 477//===----------------------------------------------------------------------===// 478// NEON-specific DAG Nodes. 479//===----------------------------------------------------------------------===// 480 481def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 482def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; 483 484// Types for vector shift by immediates. The "SHX" version is for long and 485// narrow operations where the source and destination vectors have different 486// types. The "SHINS" version is for shift and insert operations. 487def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 488 SDTCisVT<2, i32>]>; 489def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 490 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 491 492def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; 493 494def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; 495def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; 496def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; 497 498def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; 499def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; 500def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; 501def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; 502def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; 503def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; 504 505def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; 506def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; 507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; 508 509def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; 510def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; 511 512def NEONvbsp : SDNode<"ARMISD::VBSP", 513 SDTypeProfile<1, 3, [SDTCisVec<0>, 514 SDTCisSameAs<0, 1>, 515 SDTCisSameAs<0, 2>, 516 SDTCisSameAs<0, 3>]>>; 517 518def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 519 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 520def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 521 522def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 523 SDTCisSameAs<0, 2>, 524 SDTCisSameAs<0, 3>]>; 525def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 526def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 527def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 528 529def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 530 SDTCisVT<2, v8i8>]>; 531def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 532 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 533def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 534def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 535 536 537//===----------------------------------------------------------------------===// 538// NEON load / store instructions 539//===----------------------------------------------------------------------===// 540 541// Use VLDM to load a Q register as a D register pair. 542// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 543def VLDMQIA 544 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 545 IIC_fpLoad_m, "", 546 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 547 548// Use VSTM to store a Q register as a D register pair. 549// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 550def VSTMQIA 551 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 552 IIC_fpStore_m, "", 553 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 554 555// Classes for VLD* pseudo-instructions with multi-register operands. 556// These are expanded to real instructions after register allocation. 557class VLDQPseudo<InstrItinClass itin> 558 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 559class VLDQWBPseudo<InstrItinClass itin> 560 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 561 (ins addrmode6:$addr, am6offset:$offset), itin, 562 "$addr.addr = $wb">; 563class VLDQWBfixedPseudo<InstrItinClass itin> 564 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 565 (ins addrmode6:$addr), itin, 566 "$addr.addr = $wb">; 567class VLDQWBregisterPseudo<InstrItinClass itin> 568 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 569 (ins addrmode6:$addr, rGPR:$offset), itin, 570 "$addr.addr = $wb">; 571 572class VLDQQPseudo<InstrItinClass itin> 573 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 574class VLDQQWBPseudo<InstrItinClass itin> 575 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 576 (ins addrmode6:$addr, am6offset:$offset), itin, 577 "$addr.addr = $wb">; 578class VLDQQWBfixedPseudo<InstrItinClass itin> 579 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 580 (ins addrmode6:$addr), itin, 581 "$addr.addr = $wb">; 582class VLDQQWBregisterPseudo<InstrItinClass itin> 583 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 584 (ins addrmode6:$addr, rGPR:$offset), itin, 585 "$addr.addr = $wb">; 586 587 588class VLDQQQQPseudo<InstrItinClass itin> 589 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 590 "$src = $dst">; 591class VLDQQQQWBPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 594 "$addr.addr = $wb, $src = $dst">; 595 596let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 597 598// VLD1 : Vector Load (multiple single elements) 599class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 600 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 601 (ins AddrMode:$Rn), IIC_VLD1, 602 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 603 let Rm = 0b1111; 604 let Inst{4} = Rn{4}; 605 let DecoderMethod = "DecodeVLDST1Instruction"; 606} 607class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 608 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 609 (ins AddrMode:$Rn), IIC_VLD1x2, 610 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 611 let Rm = 0b1111; 612 let Inst{5-4} = Rn{5-4}; 613 let DecoderMethod = "DecodeVLDST1Instruction"; 614} 615 616def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 617def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 618def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 619def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 620 621def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 622def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 623def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 624def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 625 626// ...with address register writeback: 627multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 628 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 629 (ins AddrMode:$Rn), IIC_VLD1u, 630 "vld1", Dt, "$Vd, $Rn!", 631 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 632 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 633 let Inst{4} = Rn{4}; 634 let DecoderMethod = "DecodeVLDST1Instruction"; 635 } 636 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 637 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 638 "vld1", Dt, "$Vd, $Rn, $Rm", 639 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 640 let Inst{4} = Rn{4}; 641 let DecoderMethod = "DecodeVLDST1Instruction"; 642 } 643} 644multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 645 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 646 (ins AddrMode:$Rn), IIC_VLD1x2u, 647 "vld1", Dt, "$Vd, $Rn!", 648 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 650 let Inst{5-4} = Rn{5-4}; 651 let DecoderMethod = "DecodeVLDST1Instruction"; 652 } 653 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 654 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 655 "vld1", Dt, "$Vd, $Rn, $Rm", 656 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 657 let Inst{5-4} = Rn{5-4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660} 661 662defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 663defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 664defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 665defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 666defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 667defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 668defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 669defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 670 671// ...with 3 registers 672class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 673 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 674 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 675 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 676 let Rm = 0b1111; 677 let Inst{4} = Rn{4}; 678 let DecoderMethod = "DecodeVLDST1Instruction"; 679} 680multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 681 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 682 (ins AddrMode:$Rn), IIC_VLD1x2u, 683 "vld1", Dt, "$Vd, $Rn!", 684 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 685 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 686 let Inst{4} = Rn{4}; 687 let DecoderMethod = "DecodeVLDST1Instruction"; 688 } 689 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 690 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 691 "vld1", Dt, "$Vd, $Rn, $Rm", 692 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 693 let Inst{4} = Rn{4}; 694 let DecoderMethod = "DecodeVLDST1Instruction"; 695 } 696} 697 698def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 699def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 700def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 701def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 702 703defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 704defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 705defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 706defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 707 708def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 709def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 710def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 711def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 712def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 713def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 714 715def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 716def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 717def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 718def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 719def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 720def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 721def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 722def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 723 724// ...with 4 registers 725class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 726 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 727 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 728 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 729 let Rm = 0b1111; 730 let Inst{5-4} = Rn{5-4}; 731 let DecoderMethod = "DecodeVLDST1Instruction"; 732} 733multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 734 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 735 (ins AddrMode:$Rn), IIC_VLD1x2u, 736 "vld1", Dt, "$Vd, $Rn!", 737 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 738 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 739 let Inst{5-4} = Rn{5-4}; 740 let DecoderMethod = "DecodeVLDST1Instruction"; 741 } 742 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 743 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 744 "vld1", Dt, "$Vd, $Rn, $Rm", 745 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 746 let Inst{5-4} = Rn{5-4}; 747 let DecoderMethod = "DecodeVLDST1Instruction"; 748 } 749} 750 751def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 752def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 753def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 754def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 755 756defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 757defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 758defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 759defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 760 761def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 762def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 763def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 764def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 765def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 766def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 767 768def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 769def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 770def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 771def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 772def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 773def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 774def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 775def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 776 777// VLD2 : Vector Load (multiple 2-element structures) 778class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 779 InstrItinClass itin, Operand AddrMode> 780 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 781 (ins AddrMode:$Rn), itin, 782 "vld2", Dt, "$Vd, $Rn", "", []> { 783 let Rm = 0b1111; 784 let Inst{5-4} = Rn{5-4}; 785 let DecoderMethod = "DecodeVLDST2Instruction"; 786} 787 788def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 789 addrmode6align64or128>, Sched<[WriteVLD2]>; 790def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 791 addrmode6align64or128>, Sched<[WriteVLD2]>; 792def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 793 addrmode6align64or128>, Sched<[WriteVLD2]>; 794 795def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 796 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 797def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 798 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 799def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 800 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 801 802def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 803def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 804def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 805 806// ...with address register writeback: 807multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 808 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 809 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 810 (ins AddrMode:$Rn), itin, 811 "vld2", Dt, "$Vd, $Rn!", 812 "$Rn.addr = $wb", []> { 813 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 814 let Inst{5-4} = Rn{5-4}; 815 let DecoderMethod = "DecodeVLDST2Instruction"; 816 } 817 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 818 (ins AddrMode:$Rn, rGPR:$Rm), itin, 819 "vld2", Dt, "$Vd, $Rn, $Rm", 820 "$Rn.addr = $wb", []> { 821 let Inst{5-4} = Rn{5-4}; 822 let DecoderMethod = "DecodeVLDST2Instruction"; 823 } 824} 825 826defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 827 addrmode6align64or128>, Sched<[WriteVLD2]>; 828defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 829 addrmode6align64or128>, Sched<[WriteVLD2]>; 830defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 831 addrmode6align64or128>, Sched<[WriteVLD2]>; 832 833defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 834 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 835defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 836 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 837defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 838 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 839 840def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 841def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 842def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 843def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 844def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 845def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 846 847// ...with double-spaced registers 848def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 849 addrmode6align64or128>, Sched<[WriteVLD2]>; 850def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 851 addrmode6align64or128>, Sched<[WriteVLD2]>; 852def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 853 addrmode6align64or128>, Sched<[WriteVLD2]>; 854defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 855 addrmode6align64or128>, Sched<[WriteVLD2]>; 856defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 857 addrmode6align64or128>, Sched<[WriteVLD2]>; 858defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 859 addrmode6align64or128>, Sched<[WriteVLD2]>; 860 861// VLD3 : Vector Load (multiple 3-element structures) 862class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 863 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 864 (ins addrmode6:$Rn), IIC_VLD3, 865 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 866 let Rm = 0b1111; 867 let Inst{4} = Rn{4}; 868 let DecoderMethod = "DecodeVLDST3Instruction"; 869} 870 871def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 872def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 873def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 874 875def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 876def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 877def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 878 879// ...with address register writeback: 880class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 881 : NLdSt<0, 0b10, op11_8, op7_4, 882 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 883 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 884 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 885 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 886 let Inst{4} = Rn{4}; 887 let DecoderMethod = "DecodeVLDST3Instruction"; 888} 889 890def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 891def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 892def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 893 894def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 895def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 896def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 897 898// ...with double-spaced registers: 899def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 900def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 901def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 902def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 903def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 904def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 905 906def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 907def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 908def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 909 910// ...alternate versions to be allocated odd register numbers: 911def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 912def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 913def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 914 915def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 916def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 917def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 918 919// VLD4 : Vector Load (multiple 4-element structures) 920class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 921 : NLdSt<0, 0b10, op11_8, op7_4, 922 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 923 (ins addrmode6:$Rn), IIC_VLD4, 924 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 925 Sched<[WriteVLD4]> { 926 let Rm = 0b1111; 927 let Inst{5-4} = Rn{5-4}; 928 let DecoderMethod = "DecodeVLDST4Instruction"; 929} 930 931def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 932def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 933def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 934 935def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 936def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 937def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 938 939// ...with address register writeback: 940class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 941 : NLdSt<0, 0b10, op11_8, op7_4, 942 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 943 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 944 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 945 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 946 let Inst{5-4} = Rn{5-4}; 947 let DecoderMethod = "DecodeVLDST4Instruction"; 948} 949 950def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 951def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 952def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 953 954def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 955def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 956def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 957 958// ...with double-spaced registers: 959def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 960def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 961def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 962def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 963def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 964def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 965 966def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 967def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 968def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 969 970// ...alternate versions to be allocated odd register numbers: 971def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 972def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 973def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 974 975def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 976def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 977def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 978 979} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 980 981// Classes for VLD*LN pseudo-instructions with multi-register operands. 982// These are expanded to real instructions after register allocation. 983class VLDQLNPseudo<InstrItinClass itin> 984 : PseudoNLdSt<(outs QPR:$dst), 985 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 986 itin, "$src = $dst">; 987class VLDQLNWBPseudo<InstrItinClass itin> 988 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 989 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 990 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 991class VLDQQLNPseudo<InstrItinClass itin> 992 : PseudoNLdSt<(outs QQPR:$dst), 993 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 994 itin, "$src = $dst">; 995class VLDQQLNWBPseudo<InstrItinClass itin> 996 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 997 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 998 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 999class VLDQQQQLNPseudo<InstrItinClass itin> 1000 : PseudoNLdSt<(outs QQQQPR:$dst), 1001 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1002 itin, "$src = $dst">; 1003class VLDQQQQLNWBPseudo<InstrItinClass itin> 1004 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1005 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1006 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1007 1008// VLD1LN : Vector Load (single element to one lane) 1009class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1010 PatFrag LoadOp> 1011 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1012 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1013 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1014 "$src = $Vd", 1015 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1016 (i32 (LoadOp addrmode6:$Rn)), 1017 imm:$lane))]> { 1018 let Rm = 0b1111; 1019 let DecoderMethod = "DecodeVLD1LN"; 1020} 1021class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1022 PatFrag LoadOp> 1023 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1024 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1025 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1026 "$src = $Vd", 1027 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1028 (i32 (LoadOp addrmode6oneL32:$Rn)), 1029 imm:$lane))]>, Sched<[WriteVLD1]> { 1030 let Rm = 0b1111; 1031 let DecoderMethod = "DecodeVLD1LN"; 1032} 1033class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1034 Sched<[WriteVLD1]> { 1035 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1036 (i32 (LoadOp addrmode6:$addr)), 1037 imm:$lane))]; 1038} 1039 1040def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1041 let Inst{7-5} = lane{2-0}; 1042} 1043def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1044 let Inst{7-6} = lane{1-0}; 1045 let Inst{5-4} = Rn{5-4}; 1046} 1047def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1048 let Inst{7} = lane{0}; 1049 let Inst{5-4} = Rn{5-4}; 1050} 1051 1052def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1053def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1054def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1055 1056let Predicates = [HasNEON] in { 1057def : Pat<(vector_insert (v4f16 DPR:$src), 1058 (f16 (load addrmode6:$addr)), imm:$lane), 1059 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1060def : Pat<(vector_insert (v8f16 QPR:$src), 1061 (f16 (load addrmode6:$addr)), imm:$lane), 1062 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1063def : Pat<(vector_insert (v4bf16 DPR:$src), 1064 (bf16 (load addrmode6:$addr)), imm:$lane), 1065 (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 1066def : Pat<(vector_insert (v8bf16 QPR:$src), 1067 (bf16 (load addrmode6:$addr)), imm:$lane), 1068 (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1069def : Pat<(vector_insert (v2f32 DPR:$src), 1070 (f32 (load addrmode6:$addr)), imm:$lane), 1071 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1072def : Pat<(vector_insert (v4f32 QPR:$src), 1073 (f32 (load addrmode6:$addr)), imm:$lane), 1074 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1075 1076// A 64-bit subvector insert to the first 128-bit vector position 1077// is a subregister copy that needs no instruction. 1078def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1079 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1080def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1081 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1082def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1083 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1084def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1085 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1086def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1087 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1088def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1089 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1090} 1091 1092 1093let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1094 1095// ...with address register writeback: 1096class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1097 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1098 (ins addrmode6:$Rn, am6offset:$Rm, 1099 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1100 "\\{$Vd[$lane]\\}, $Rn$Rm", 1101 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1102 let DecoderMethod = "DecodeVLD1LN"; 1103} 1104 1105def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1106 let Inst{7-5} = lane{2-0}; 1107} 1108def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1109 let Inst{7-6} = lane{1-0}; 1110 let Inst{4} = Rn{4}; 1111} 1112def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1113 let Inst{7} = lane{0}; 1114 let Inst{5} = Rn{4}; 1115 let Inst{4} = Rn{4}; 1116} 1117 1118def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1119def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1120def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1121 1122// VLD2LN : Vector Load (single 2-element structure to one lane) 1123class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1124 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1125 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1126 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1127 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1128 let Rm = 0b1111; 1129 let Inst{4} = Rn{4}; 1130 let DecoderMethod = "DecodeVLD2LN"; 1131} 1132 1133def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1134 let Inst{7-5} = lane{2-0}; 1135} 1136def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1137 let Inst{7-6} = lane{1-0}; 1138} 1139def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1140 let Inst{7} = lane{0}; 1141} 1142 1143def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1144def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1145def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1146 1147// ...with double-spaced registers: 1148def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1149 let Inst{7-6} = lane{1-0}; 1150} 1151def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1152 let Inst{7} = lane{0}; 1153} 1154 1155def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1156def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1157 1158// ...with address register writeback: 1159class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1160 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1161 (ins addrmode6:$Rn, am6offset:$Rm, 1162 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1163 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1164 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1165 let Inst{4} = Rn{4}; 1166 let DecoderMethod = "DecodeVLD2LN"; 1167} 1168 1169def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1170 let Inst{7-5} = lane{2-0}; 1171} 1172def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1173 let Inst{7-6} = lane{1-0}; 1174} 1175def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1176 let Inst{7} = lane{0}; 1177} 1178 1179def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1180def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1181def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1182 1183def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1184 let Inst{7-6} = lane{1-0}; 1185} 1186def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1187 let Inst{7} = lane{0}; 1188} 1189 1190def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1191def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1192 1193// VLD3LN : Vector Load (single 3-element structure to one lane) 1194class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1195 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1196 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1197 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1198 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1199 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1200 let Rm = 0b1111; 1201 let DecoderMethod = "DecodeVLD3LN"; 1202} 1203 1204def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1205 let Inst{7-5} = lane{2-0}; 1206} 1207def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1208 let Inst{7-6} = lane{1-0}; 1209} 1210def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1211 let Inst{7} = lane{0}; 1212} 1213 1214def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1215def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1216def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1217 1218// ...with double-spaced registers: 1219def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1220 let Inst{7-6} = lane{1-0}; 1221} 1222def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1223 let Inst{7} = lane{0}; 1224} 1225 1226def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1227def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1228 1229// ...with address register writeback: 1230class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1231 : NLdStLn<1, 0b10, op11_8, op7_4, 1232 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1233 (ins addrmode6:$Rn, am6offset:$Rm, 1234 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1235 IIC_VLD3lnu, "vld3", Dt, 1236 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1237 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1238 []>, Sched<[WriteVLD2]> { 1239 let DecoderMethod = "DecodeVLD3LN"; 1240} 1241 1242def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1243 let Inst{7-5} = lane{2-0}; 1244} 1245def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1246 let Inst{7-6} = lane{1-0}; 1247} 1248def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1249 let Inst{7} = lane{0}; 1250} 1251 1252def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1253def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1254def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1255 1256def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1257 let Inst{7-6} = lane{1-0}; 1258} 1259def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1260 let Inst{7} = lane{0}; 1261} 1262 1263def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1264def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1265 1266// VLD4LN : Vector Load (single 4-element structure to one lane) 1267class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1268 : NLdStLn<1, 0b10, op11_8, op7_4, 1269 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1270 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1271 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1272 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1273 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1274 Sched<[WriteVLD2]> { 1275 let Rm = 0b1111; 1276 let Inst{4} = Rn{4}; 1277 let DecoderMethod = "DecodeVLD4LN"; 1278} 1279 1280def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1281 let Inst{7-5} = lane{2-0}; 1282} 1283def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1284 let Inst{7-6} = lane{1-0}; 1285} 1286def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1287 let Inst{7} = lane{0}; 1288 let Inst{5} = Rn{5}; 1289} 1290 1291def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1292def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1293def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1294 1295// ...with double-spaced registers: 1296def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1297 let Inst{7-6} = lane{1-0}; 1298} 1299def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1300 let Inst{7} = lane{0}; 1301 let Inst{5} = Rn{5}; 1302} 1303 1304def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1305def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1306 1307// ...with address register writeback: 1308class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1309 : NLdStLn<1, 0b10, op11_8, op7_4, 1310 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1311 (ins addrmode6:$Rn, am6offset:$Rm, 1312 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1313 IIC_VLD4lnu, "vld4", Dt, 1314"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1315"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1316 []> { 1317 let Inst{4} = Rn{4}; 1318 let DecoderMethod = "DecodeVLD4LN" ; 1319} 1320 1321def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1322 let Inst{7-5} = lane{2-0}; 1323} 1324def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1325 let Inst{7-6} = lane{1-0}; 1326} 1327def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1328 let Inst{7} = lane{0}; 1329 let Inst{5} = Rn{5}; 1330} 1331 1332def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1333def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1334def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1335 1336def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1337 let Inst{7-6} = lane{1-0}; 1338} 1339def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1340 let Inst{7} = lane{0}; 1341 let Inst{5} = Rn{5}; 1342} 1343 1344def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1345def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1346 1347} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1348 1349// VLD1DUP : Vector Load (single element to all lanes) 1350class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1351 Operand AddrMode> 1352 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1353 (ins AddrMode:$Rn), 1354 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1355 [(set VecListOneDAllLanes:$Vd, 1356 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1357 Sched<[WriteVLD2]> { 1358 let Rm = 0b1111; 1359 let Inst{4} = Rn{4}; 1360 let DecoderMethod = "DecodeVLD1DupInstruction"; 1361} 1362def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1363 addrmode6dupalignNone>; 1364def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1365 addrmode6dupalign16>; 1366def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1367 addrmode6dupalign32>; 1368 1369let Predicates = [HasNEON] in { 1370def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1371 (VLD1DUPd32 addrmode6:$addr)>; 1372} 1373 1374class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1375 Operand AddrMode> 1376 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1377 (ins AddrMode:$Rn), IIC_VLD1dup, 1378 "vld1", Dt, "$Vd, $Rn", "", 1379 [(set VecListDPairAllLanes:$Vd, 1380 (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1381 let Rm = 0b1111; 1382 let Inst{4} = Rn{4}; 1383 let DecoderMethod = "DecodeVLD1DupInstruction"; 1384} 1385 1386def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1387 addrmode6dupalignNone>; 1388def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1389 addrmode6dupalign16>; 1390def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1391 addrmode6dupalign32>; 1392 1393let Predicates = [HasNEON] in { 1394def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))), 1395 (VLD1DUPq32 addrmode6:$addr)>; 1396} 1397 1398let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1399// ...with address register writeback: 1400multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1401 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1402 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1403 (ins AddrMode:$Rn), IIC_VLD1dupu, 1404 "vld1", Dt, "$Vd, $Rn!", 1405 "$Rn.addr = $wb", []> { 1406 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1407 let Inst{4} = Rn{4}; 1408 let DecoderMethod = "DecodeVLD1DupInstruction"; 1409 } 1410 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1411 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1412 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1413 "vld1", Dt, "$Vd, $Rn, $Rm", 1414 "$Rn.addr = $wb", []> { 1415 let Inst{4} = Rn{4}; 1416 let DecoderMethod = "DecodeVLD1DupInstruction"; 1417 } 1418} 1419multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1420 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1421 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1422 (ins AddrMode:$Rn), IIC_VLD1dupu, 1423 "vld1", Dt, "$Vd, $Rn!", 1424 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1425 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1426 let Inst{4} = Rn{4}; 1427 let DecoderMethod = "DecodeVLD1DupInstruction"; 1428 } 1429 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1430 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1431 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1432 "vld1", Dt, "$Vd, $Rn, $Rm", 1433 "$Rn.addr = $wb", []> { 1434 let Inst{4} = Rn{4}; 1435 let DecoderMethod = "DecodeVLD1DupInstruction"; 1436 } 1437} 1438 1439defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1440defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1441defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1442 1443defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1444defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1445defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1446 1447// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1448class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1449 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1450 (ins AddrMode:$Rn), IIC_VLD2dup, 1451 "vld2", Dt, "$Vd, $Rn", "", []> { 1452 let Rm = 0b1111; 1453 let Inst{4} = Rn{4}; 1454 let DecoderMethod = "DecodeVLD2DupInstruction"; 1455} 1456 1457def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1458 addrmode6dupalign16>; 1459def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1460 addrmode6dupalign32>; 1461def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1462 addrmode6dupalign64>; 1463 1464// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1465// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1466// ...with double-spaced registers 1467def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1468 addrmode6dupalign16>; 1469def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1470 addrmode6dupalign32>; 1471def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1472 addrmode6dupalign64>; 1473 1474def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1475def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1476def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1477def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1478def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1479def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1480 1481// ...with address register writeback: 1482multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1483 Operand AddrMode> { 1484 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1485 (outs VdTy:$Vd, GPR:$wb), 1486 (ins AddrMode:$Rn), IIC_VLD2dupu, 1487 "vld2", Dt, "$Vd, $Rn!", 1488 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1489 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1490 let Inst{4} = Rn{4}; 1491 let DecoderMethod = "DecodeVLD2DupInstruction"; 1492 } 1493 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1494 (outs VdTy:$Vd, GPR:$wb), 1495 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1496 "vld2", Dt, "$Vd, $Rn, $Rm", 1497 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1498 let Inst{4} = Rn{4}; 1499 let DecoderMethod = "DecodeVLD2DupInstruction"; 1500 } 1501} 1502 1503defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1504 addrmode6dupalign16>; 1505defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1506 addrmode6dupalign32>; 1507defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1508 addrmode6dupalign64>; 1509 1510defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1511 addrmode6dupalign16>; 1512defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1513 addrmode6dupalign32>; 1514defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1515 addrmode6dupalign64>; 1516 1517// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1518class VLD3DUP<bits<4> op7_4, string Dt> 1519 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1520 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1521 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1522 Sched<[WriteVLD2]> { 1523 let Rm = 0b1111; 1524 let Inst{4} = 0; 1525 let DecoderMethod = "DecodeVLD3DupInstruction"; 1526} 1527 1528def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1529def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1530def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1531 1532def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1533def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1534def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1535 1536// ...with double-spaced registers (not used for codegen): 1537def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1538def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1539def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1540 1541def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1542def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1543def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1544def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1545def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1546def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1547 1548// ...with address register writeback: 1549class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1550 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1551 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1552 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1553 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1554 let Inst{4} = 0; 1555 let DecoderMethod = "DecodeVLD3DupInstruction"; 1556} 1557 1558def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1559def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1560def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1561 1562def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1563def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1564def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1565 1566def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1567def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1568def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1569 1570// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1571class VLD4DUP<bits<4> op7_4, string Dt> 1572 : NLdSt<1, 0b10, 0b1111, op7_4, 1573 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1574 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1575 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1576 let Rm = 0b1111; 1577 let Inst{4} = Rn{4}; 1578 let DecoderMethod = "DecodeVLD4DupInstruction"; 1579} 1580 1581def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1582def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1583def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1584 1585def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1586def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1587def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1588 1589// ...with double-spaced registers (not used for codegen): 1590def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1591def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1592def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1593 1594def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1595def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1596def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1597def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1598def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1599def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1600 1601// ...with address register writeback: 1602class VLD4DUPWB<bits<4> op7_4, string Dt> 1603 : NLdSt<1, 0b10, 0b1111, op7_4, 1604 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1605 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1606 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1607 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1608 let Inst{4} = Rn{4}; 1609 let DecoderMethod = "DecodeVLD4DupInstruction"; 1610} 1611 1612def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1613def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1614def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1615 1616def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1617def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1618def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1619 1620def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1621def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1622def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1623 1624} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1625 1626let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1627 1628// Classes for VST* pseudo-instructions with multi-register operands. 1629// These are expanded to real instructions after register allocation. 1630class VSTQPseudo<InstrItinClass itin> 1631 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1632class VSTQWBPseudo<InstrItinClass itin> 1633 : PseudoNLdSt<(outs GPR:$wb), 1634 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1635 "$addr.addr = $wb">; 1636class VSTQWBfixedPseudo<InstrItinClass itin> 1637 : PseudoNLdSt<(outs GPR:$wb), 1638 (ins addrmode6:$addr, QPR:$src), itin, 1639 "$addr.addr = $wb">; 1640class VSTQWBregisterPseudo<InstrItinClass itin> 1641 : PseudoNLdSt<(outs GPR:$wb), 1642 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1643 "$addr.addr = $wb">; 1644class VSTQQPseudo<InstrItinClass itin> 1645 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1646class VSTQQWBPseudo<InstrItinClass itin> 1647 : PseudoNLdSt<(outs GPR:$wb), 1648 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1649 "$addr.addr = $wb">; 1650class VSTQQWBfixedPseudo<InstrItinClass itin> 1651 : PseudoNLdSt<(outs GPR:$wb), 1652 (ins addrmode6:$addr, QQPR:$src), itin, 1653 "$addr.addr = $wb">; 1654class VSTQQWBregisterPseudo<InstrItinClass itin> 1655 : PseudoNLdSt<(outs GPR:$wb), 1656 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1657 "$addr.addr = $wb">; 1658 1659class VSTQQQQPseudo<InstrItinClass itin> 1660 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1661class VSTQQQQWBPseudo<InstrItinClass itin> 1662 : PseudoNLdSt<(outs GPR:$wb), 1663 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1664 "$addr.addr = $wb">; 1665 1666// VST1 : Vector Store (multiple single elements) 1667class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1668 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1669 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1670 let Rm = 0b1111; 1671 let Inst{4} = Rn{4}; 1672 let DecoderMethod = "DecodeVLDST1Instruction"; 1673} 1674class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1675 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1676 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1677 let Rm = 0b1111; 1678 let Inst{5-4} = Rn{5-4}; 1679 let DecoderMethod = "DecodeVLDST1Instruction"; 1680} 1681 1682def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1683def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1684def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1685def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1686 1687def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1688def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1689def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1690def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1691 1692// ...with address register writeback: 1693multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1694 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1695 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1696 "vst1", Dt, "$Vd, $Rn!", 1697 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1698 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1699 let Inst{4} = Rn{4}; 1700 let DecoderMethod = "DecodeVLDST1Instruction"; 1701 } 1702 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1703 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1704 IIC_VLD1u, 1705 "vst1", Dt, "$Vd, $Rn, $Rm", 1706 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1707 let Inst{4} = Rn{4}; 1708 let DecoderMethod = "DecodeVLDST1Instruction"; 1709 } 1710} 1711multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1712 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1713 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1714 "vst1", Dt, "$Vd, $Rn!", 1715 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1716 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1717 let Inst{5-4} = Rn{5-4}; 1718 let DecoderMethod = "DecodeVLDST1Instruction"; 1719 } 1720 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1721 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1722 IIC_VLD1x2u, 1723 "vst1", Dt, "$Vd, $Rn, $Rm", 1724 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1725 let Inst{5-4} = Rn{5-4}; 1726 let DecoderMethod = "DecodeVLDST1Instruction"; 1727 } 1728} 1729 1730defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1731defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1732defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1733defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1734 1735defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1736defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1737defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1738defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1739 1740// ...with 3 registers 1741class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1742 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1743 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1744 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1745 let Rm = 0b1111; 1746 let Inst{4} = Rn{4}; 1747 let DecoderMethod = "DecodeVLDST1Instruction"; 1748} 1749multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1750 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1751 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1752 "vst1", Dt, "$Vd, $Rn!", 1753 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1754 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1755 let Inst{5-4} = Rn{5-4}; 1756 let DecoderMethod = "DecodeVLDST1Instruction"; 1757 } 1758 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1759 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1760 IIC_VLD1x3u, 1761 "vst1", Dt, "$Vd, $Rn, $Rm", 1762 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1763 let Inst{5-4} = Rn{5-4}; 1764 let DecoderMethod = "DecodeVLDST1Instruction"; 1765 } 1766} 1767 1768def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1769def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1770def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1771def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1772 1773defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1774defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1775defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1776defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1777 1778def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1779def VST1d8TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1780def VST1d8TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1781def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1782def VST1d16TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1783def VST1d16TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1784def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1785def VST1d32TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1786def VST1d32TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1787def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1788def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1789def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1790 1791def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1792def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1793def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1794def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1795 1796def VST1q8HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1797def VST1q16HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1798def VST1q32HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1799def VST1q64HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1800 1801def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1802def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1803def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1804def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1805 1806// ...with 4 registers 1807class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1808 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1809 (ins AddrMode:$Rn, VecListFourD:$Vd), 1810 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1811 []>, Sched<[WriteVST4]> { 1812 let Rm = 0b1111; 1813 let Inst{5-4} = Rn{5-4}; 1814 let DecoderMethod = "DecodeVLDST1Instruction"; 1815} 1816multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1817 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1818 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1819 "vst1", Dt, "$Vd, $Rn!", 1820 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1821 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1822 let Inst{5-4} = Rn{5-4}; 1823 let DecoderMethod = "DecodeVLDST1Instruction"; 1824 } 1825 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1826 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1827 IIC_VLD1x4u, 1828 "vst1", Dt, "$Vd, $Rn, $Rm", 1829 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1830 let Inst{5-4} = Rn{5-4}; 1831 let DecoderMethod = "DecodeVLDST1Instruction"; 1832 } 1833} 1834 1835def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1836def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1837def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1838def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1839 1840defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1841defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1842defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1843defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1844 1845def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1846def VST1d8QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1847def VST1d8QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1848def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1849def VST1d16QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1850def VST1d16QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1851def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1852def VST1d32QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1853def VST1d32QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1854def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1855def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1856def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1857 1858def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1859def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1860def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1861def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1862 1863def VST1q8HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1864def VST1q16HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1865def VST1q32HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1866def VST1q64HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1867 1868def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1869def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1870def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1871def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1872 1873// VST2 : Vector Store (multiple 2-element structures) 1874class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1875 InstrItinClass itin, Operand AddrMode> 1876 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1877 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1878 let Rm = 0b1111; 1879 let Inst{5-4} = Rn{5-4}; 1880 let DecoderMethod = "DecodeVLDST2Instruction"; 1881} 1882 1883def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1884 addrmode6align64or128>, Sched<[WriteVST2]>; 1885def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1886 addrmode6align64or128>, Sched<[WriteVST2]>; 1887def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1888 addrmode6align64or128>, Sched<[WriteVST2]>; 1889 1890def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1891 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1892def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1893 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1894def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1895 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1896 1897def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1898def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1899def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1900 1901// ...with address register writeback: 1902multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1903 RegisterOperand VdTy, Operand AddrMode> { 1904 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1905 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1906 "vst2", Dt, "$Vd, $Rn!", 1907 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1908 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1909 let Inst{5-4} = Rn{5-4}; 1910 let DecoderMethod = "DecodeVLDST2Instruction"; 1911 } 1912 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1913 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1914 "vst2", Dt, "$Vd, $Rn, $Rm", 1915 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1916 let Inst{5-4} = Rn{5-4}; 1917 let DecoderMethod = "DecodeVLDST2Instruction"; 1918 } 1919} 1920multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1921 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1922 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1923 "vst2", Dt, "$Vd, $Rn!", 1924 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1925 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1926 let Inst{5-4} = Rn{5-4}; 1927 let DecoderMethod = "DecodeVLDST2Instruction"; 1928 } 1929 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1930 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1931 IIC_VLD1u, 1932 "vst2", Dt, "$Vd, $Rn, $Rm", 1933 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1934 let Inst{5-4} = Rn{5-4}; 1935 let DecoderMethod = "DecodeVLDST2Instruction"; 1936 } 1937} 1938 1939defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1940 addrmode6align64or128>; 1941defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1942 addrmode6align64or128>; 1943defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1944 addrmode6align64or128>; 1945 1946defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1947defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1948defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1949 1950def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1951def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1952def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1953def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1954def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1955def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1956 1957// ...with double-spaced registers 1958def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1959 addrmode6align64or128>; 1960def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1961 addrmode6align64or128>; 1962def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1963 addrmode6align64or128>; 1964defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1965 addrmode6align64or128>; 1966defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1967 addrmode6align64or128>; 1968defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1969 addrmode6align64or128>; 1970 1971// VST3 : Vector Store (multiple 3-element structures) 1972class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1973 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1974 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1975 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 1976 let Rm = 0b1111; 1977 let Inst{4} = Rn{4}; 1978 let DecoderMethod = "DecodeVLDST3Instruction"; 1979} 1980 1981def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1982def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1983def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1984 1985def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1986def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1987def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 1988 1989// ...with address register writeback: 1990class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1991 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1992 (ins addrmode6:$Rn, am6offset:$Rm, 1993 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1994 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1995 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1996 let Inst{4} = Rn{4}; 1997 let DecoderMethod = "DecodeVLDST3Instruction"; 1998} 1999 2000def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 2001def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 2002def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 2003 2004def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2005def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2006def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2007 2008// ...with double-spaced registers: 2009def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2010def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2011def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2012def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2013def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2014def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2015 2016def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2017def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2018def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2019 2020// ...alternate versions to be allocated odd register numbers: 2021def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2022def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2023def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2024 2025def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2026def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2027def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2028 2029// VST4 : Vector Store (multiple 4-element structures) 2030class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2031 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2032 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2033 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2034 "", []>, Sched<[WriteVST4]> { 2035 let Rm = 0b1111; 2036 let Inst{5-4} = Rn{5-4}; 2037 let DecoderMethod = "DecodeVLDST4Instruction"; 2038} 2039 2040def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2041def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2042def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2043 2044def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2045def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2046def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2047 2048// ...with address register writeback: 2049class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2050 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2051 (ins addrmode6:$Rn, am6offset:$Rm, 2052 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2053 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2054 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2055 let Inst{5-4} = Rn{5-4}; 2056 let DecoderMethod = "DecodeVLDST4Instruction"; 2057} 2058 2059def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2060def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2061def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2062 2063def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2064def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2065def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2066 2067// ...with double-spaced registers: 2068def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2069def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2070def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2071def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2072def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2073def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2074 2075def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2076def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2077def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2078 2079// ...alternate versions to be allocated odd register numbers: 2080def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2081def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2082def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2083 2084def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2085def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2086def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2087 2088} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2089 2090// Classes for VST*LN pseudo-instructions with multi-register operands. 2091// These are expanded to real instructions after register allocation. 2092class VSTQLNPseudo<InstrItinClass itin> 2093 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2094 itin, "">; 2095class VSTQLNWBPseudo<InstrItinClass itin> 2096 : PseudoNLdSt<(outs GPR:$wb), 2097 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2098 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2099class VSTQQLNPseudo<InstrItinClass itin> 2100 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2101 itin, "">; 2102class VSTQQLNWBPseudo<InstrItinClass itin> 2103 : PseudoNLdSt<(outs GPR:$wb), 2104 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2105 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2106class VSTQQQQLNPseudo<InstrItinClass itin> 2107 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2108 itin, "">; 2109class VSTQQQQLNWBPseudo<InstrItinClass itin> 2110 : PseudoNLdSt<(outs GPR:$wb), 2111 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2112 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2113 2114// VST1LN : Vector Store (single element from one lane) 2115class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2116 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2117 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2118 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2119 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2120 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2121 Sched<[WriteVST1]> { 2122 let Rm = 0b1111; 2123 let DecoderMethod = "DecodeVST1LN"; 2124} 2125class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2126 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2127 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2128 addrmode6:$addr)]; 2129} 2130 2131def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2132 ARMvgetlaneu, addrmode6> { 2133 let Inst{7-5} = lane{2-0}; 2134} 2135def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2136 ARMvgetlaneu, addrmode6> { 2137 let Inst{7-6} = lane{1-0}; 2138 let Inst{4} = Rn{4}; 2139} 2140 2141def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2142 addrmode6oneL32> { 2143 let Inst{7} = lane{0}; 2144 let Inst{5-4} = Rn{5-4}; 2145} 2146 2147def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>; 2148def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>; 2149def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2150 2151let Predicates = [HasNEON] in { 2152def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2153 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2154def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2155 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2156 2157def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr), 2158 (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>; 2159def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr), 2160 (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2161} 2162 2163// ...with address register writeback: 2164class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2165 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2166 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2167 (ins AdrMode:$Rn, am6offset:$Rm, 2168 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2169 "\\{$Vd[$lane]\\}, $Rn$Rm", 2170 "$Rn.addr = $wb", 2171 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2172 AdrMode:$Rn, am6offset:$Rm))]>, 2173 Sched<[WriteVST1]> { 2174 let DecoderMethod = "DecodeVST1LN"; 2175} 2176class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2177 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2178 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2179 addrmode6:$addr, am6offset:$offset))]; 2180} 2181 2182def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2183 ARMvgetlaneu, addrmode6> { 2184 let Inst{7-5} = lane{2-0}; 2185} 2186def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2187 ARMvgetlaneu, addrmode6> { 2188 let Inst{7-6} = lane{1-0}; 2189 let Inst{4} = Rn{4}; 2190} 2191def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2192 extractelt, addrmode6oneL32> { 2193 let Inst{7} = lane{0}; 2194 let Inst{5-4} = Rn{5-4}; 2195} 2196 2197def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>; 2198def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>; 2199def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2200 2201let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2202 2203// VST2LN : Vector Store (single 2-element structure from one lane) 2204class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2205 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2206 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2207 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2208 "", []>, Sched<[WriteVST1]> { 2209 let Rm = 0b1111; 2210 let Inst{4} = Rn{4}; 2211 let DecoderMethod = "DecodeVST2LN"; 2212} 2213 2214def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2215 let Inst{7-5} = lane{2-0}; 2216} 2217def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2218 let Inst{7-6} = lane{1-0}; 2219} 2220def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2221 let Inst{7} = lane{0}; 2222} 2223 2224def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2225def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2226def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2227 2228// ...with double-spaced registers: 2229def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2230 let Inst{7-6} = lane{1-0}; 2231 let Inst{4} = Rn{4}; 2232} 2233def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2234 let Inst{7} = lane{0}; 2235 let Inst{4} = Rn{4}; 2236} 2237 2238def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2239def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2240 2241// ...with address register writeback: 2242class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2243 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2244 (ins addrmode6:$Rn, am6offset:$Rm, 2245 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2246 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2247 "$Rn.addr = $wb", []> { 2248 let Inst{4} = Rn{4}; 2249 let DecoderMethod = "DecodeVST2LN"; 2250} 2251 2252def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2253 let Inst{7-5} = lane{2-0}; 2254} 2255def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2256 let Inst{7-6} = lane{1-0}; 2257} 2258def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2259 let Inst{7} = lane{0}; 2260} 2261 2262def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2263def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2264def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2265 2266def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2267 let Inst{7-6} = lane{1-0}; 2268} 2269def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2270 let Inst{7} = lane{0}; 2271} 2272 2273def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2274def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2275 2276// VST3LN : Vector Store (single 3-element structure from one lane) 2277class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2278 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2279 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2280 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2281 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2282 Sched<[WriteVST2]> { 2283 let Rm = 0b1111; 2284 let DecoderMethod = "DecodeVST3LN"; 2285} 2286 2287def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2288 let Inst{7-5} = lane{2-0}; 2289} 2290def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2291 let Inst{7-6} = lane{1-0}; 2292} 2293def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2294 let Inst{7} = lane{0}; 2295} 2296 2297def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2298def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2299def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2300 2301// ...with double-spaced registers: 2302def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2303 let Inst{7-6} = lane{1-0}; 2304} 2305def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2306 let Inst{7} = lane{0}; 2307} 2308 2309def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2310def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2311 2312// ...with address register writeback: 2313class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2314 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2315 (ins addrmode6:$Rn, am6offset:$Rm, 2316 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2317 IIC_VST3lnu, "vst3", Dt, 2318 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2319 "$Rn.addr = $wb", []> { 2320 let DecoderMethod = "DecodeVST3LN"; 2321} 2322 2323def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2324 let Inst{7-5} = lane{2-0}; 2325} 2326def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2327 let Inst{7-6} = lane{1-0}; 2328} 2329def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2330 let Inst{7} = lane{0}; 2331} 2332 2333def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2334def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2335def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2336 2337def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2338 let Inst{7-6} = lane{1-0}; 2339} 2340def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2341 let Inst{7} = lane{0}; 2342} 2343 2344def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2345def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2346 2347// VST4LN : Vector Store (single 4-element structure from one lane) 2348class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2349 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2350 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2351 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2352 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2353 "", []>, Sched<[WriteVST2]> { 2354 let Rm = 0b1111; 2355 let Inst{4} = Rn{4}; 2356 let DecoderMethod = "DecodeVST4LN"; 2357} 2358 2359def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2360 let Inst{7-5} = lane{2-0}; 2361} 2362def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2363 let Inst{7-6} = lane{1-0}; 2364} 2365def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2366 let Inst{7} = lane{0}; 2367 let Inst{5} = Rn{5}; 2368} 2369 2370def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2371def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2372def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2373 2374// ...with double-spaced registers: 2375def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2376 let Inst{7-6} = lane{1-0}; 2377} 2378def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2379 let Inst{7} = lane{0}; 2380 let Inst{5} = Rn{5}; 2381} 2382 2383def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2384def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2385 2386// ...with address register writeback: 2387class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2388 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2389 (ins addrmode6:$Rn, am6offset:$Rm, 2390 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2391 IIC_VST4lnu, "vst4", Dt, 2392 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2393 "$Rn.addr = $wb", []> { 2394 let Inst{4} = Rn{4}; 2395 let DecoderMethod = "DecodeVST4LN"; 2396} 2397 2398def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2399 let Inst{7-5} = lane{2-0}; 2400} 2401def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2402 let Inst{7-6} = lane{1-0}; 2403} 2404def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2405 let Inst{7} = lane{0}; 2406 let Inst{5} = Rn{5}; 2407} 2408 2409def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2410def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2411def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2412 2413def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2414 let Inst{7-6} = lane{1-0}; 2415} 2416def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2417 let Inst{7} = lane{0}; 2418 let Inst{5} = Rn{5}; 2419} 2420 2421def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2422def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2423 2424} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2425 2426// Use vld1/vst1 for unaligned f64 load / store 2427let Predicates = [IsLE,HasNEON] in { 2428def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2429 (VLD1d16 addrmode6:$addr)>; 2430def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2431 (VST1d16 addrmode6:$addr, DPR:$value)>; 2432def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2433 (VLD1d8 addrmode6:$addr)>; 2434def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2435 (VST1d8 addrmode6:$addr, DPR:$value)>; 2436} 2437let Predicates = [IsBE,HasNEON] in { 2438def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2439 (VLD1d64 addrmode6:$addr)>; 2440def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2441 (VST1d64 addrmode6:$addr, DPR:$value)>; 2442} 2443 2444// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2445// load / store if it's legal. 2446let Predicates = [HasNEON] in { 2447def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2448 (VLD1q64 addrmode6:$addr)>; 2449def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2450 (VST1q64 addrmode6:$addr, QPR:$value)>; 2451} 2452let Predicates = [IsLE,HasNEON] in { 2453def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2454 (VLD1q32 addrmode6:$addr)>; 2455def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2456 (VST1q32 addrmode6:$addr, QPR:$value)>; 2457def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2458 (VLD1q16 addrmode6:$addr)>; 2459def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2460 (VST1q16 addrmode6:$addr, QPR:$value)>; 2461def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2462 (VLD1q8 addrmode6:$addr)>; 2463def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2464 (VST1q8 addrmode6:$addr, QPR:$value)>; 2465} 2466 2467//===----------------------------------------------------------------------===// 2468// Instruction Classes 2469//===----------------------------------------------------------------------===// 2470 2471// Basic 2-register operations: double- and quad-register. 2472class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2473 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2474 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2475 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2476 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2477 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2478class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2479 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2480 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2481 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2482 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2483 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2484 2485// Basic 2-register intrinsics, both double- and quad-register. 2486class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2487 bits<2> op17_16, bits<5> op11_7, bit op4, 2488 InstrItinClass itin, string OpcodeStr, string Dt, 2489 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2490 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2491 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2492 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2493class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2494 bits<2> op17_16, bits<5> op11_7, bit op4, 2495 InstrItinClass itin, string OpcodeStr, string Dt, 2496 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2497 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2498 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2499 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2500 2501// Same as above, but not predicated. 2502class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2503 InstrItinClass itin, string OpcodeStr, string Dt, 2504 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2505 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2506 itin, OpcodeStr, Dt, 2507 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2508 2509class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2510 InstrItinClass itin, string OpcodeStr, string Dt, 2511 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2512 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2513 itin, OpcodeStr, Dt, 2514 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2515 2516// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2517class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2518 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2519 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2520 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2521 itin, OpcodeStr, Dt, 2522 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2523 2524// Same as N2VQIntXnp but with Vd as a src register. 2525class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2526 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2527 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2528 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2529 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2530 itin, OpcodeStr, Dt, 2531 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2532 let Constraints = "$src = $Vd"; 2533} 2534 2535// Narrow 2-register operations. 2536class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2537 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2538 InstrItinClass itin, string OpcodeStr, string Dt, 2539 ValueType TyD, ValueType TyQ, SDNode OpNode> 2540 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2541 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2542 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2543 2544// Narrow 2-register intrinsics. 2545class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2546 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2547 InstrItinClass itin, string OpcodeStr, string Dt, 2548 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2549 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2550 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2551 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2552 2553// Long 2-register operations (currently only used for VMOVL). 2554class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2555 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2556 InstrItinClass itin, string OpcodeStr, string Dt, 2557 ValueType TyQ, ValueType TyD, SDNode OpNode> 2558 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2559 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2560 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2561 2562// Long 2-register intrinsics. 2563class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2564 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2565 InstrItinClass itin, string OpcodeStr, string Dt, 2566 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2567 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2568 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2569 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2570 2571// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2572class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2573 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2574 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2575 OpcodeStr, Dt, "$Vd, $Vm", 2576 "$src1 = $Vd, $src2 = $Vm", []>; 2577class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2578 InstrItinClass itin, string OpcodeStr, string Dt> 2579 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2580 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2581 "$src1 = $Vd, $src2 = $Vm", []>; 2582 2583// Basic 3-register operations: double- and quad-register. 2584class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2585 InstrItinClass itin, string OpcodeStr, string Dt, 2586 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2587 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2588 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2589 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2590 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2591 // All of these have a two-operand InstAlias. 2592 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2593 let isCommutable = Commutable; 2594} 2595// Same as N3VD but no data type. 2596class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2597 InstrItinClass itin, string OpcodeStr, 2598 ValueType ResTy, ValueType OpTy, 2599 SDNode OpNode, bit Commutable> 2600 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2601 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2602 OpcodeStr, "$Vd, $Vn, $Vm", "", 2603 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2604 // All of these have a two-operand InstAlias. 2605 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2606 let isCommutable = Commutable; 2607} 2608 2609class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2610 InstrItinClass itin, string OpcodeStr, string Dt, 2611 ValueType Ty, SDNode ShOp> 2612 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2613 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2614 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2615 [(set (Ty DPR:$Vd), 2616 (Ty (ShOp (Ty DPR:$Vn), 2617 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2618 // All of these have a two-operand InstAlias. 2619 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2620 let isCommutable = 0; 2621} 2622class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2623 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2624 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2625 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2626 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2627 [(set (Ty DPR:$Vd), 2628 (Ty (ShOp (Ty DPR:$Vn), 2629 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2630 // All of these have a two-operand InstAlias. 2631 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2632 let isCommutable = 0; 2633} 2634 2635class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2636 InstrItinClass itin, string OpcodeStr, string Dt, 2637 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2638 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2639 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2640 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2641 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2642 // All of these have a two-operand InstAlias. 2643 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2644 let isCommutable = Commutable; 2645} 2646class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2647 InstrItinClass itin, string OpcodeStr, 2648 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2649 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2650 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2651 OpcodeStr, "$Vd, $Vn, $Vm", "", 2652 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2653 // All of these have a two-operand InstAlias. 2654 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2655 let isCommutable = Commutable; 2656} 2657class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2658 InstrItinClass itin, string OpcodeStr, string Dt, 2659 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2660 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2661 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2662 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2663 [(set (ResTy QPR:$Vd), 2664 (ResTy (ShOp (ResTy QPR:$Vn), 2665 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2666 imm:$lane)))))]> { 2667 // All of these have a two-operand InstAlias. 2668 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2669 let isCommutable = 0; 2670} 2671class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2672 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2673 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2674 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2675 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2676 [(set (ResTy QPR:$Vd), 2677 (ResTy (ShOp (ResTy QPR:$Vn), 2678 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2679 imm:$lane)))))]> { 2680 // All of these have a two-operand InstAlias. 2681 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2682 let isCommutable = 0; 2683} 2684 2685// Basic 3-register intrinsics, both double- and quad-register. 2686class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2687 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2688 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2689 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2690 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2691 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2692 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2693 // All of these have a two-operand InstAlias. 2694 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2695 let isCommutable = Commutable; 2696} 2697 2698class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2699 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2700 string Dt, ValueType ResTy, ValueType OpTy, 2701 SDPatternOperator IntOp, bit Commutable> 2702 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2703 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2704 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2705 2706class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2707 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2708 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2709 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2710 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2711 [(set (Ty DPR:$Vd), 2712 (Ty (IntOp (Ty DPR:$Vn), 2713 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2714 imm:$lane)))))]> { 2715 let isCommutable = 0; 2716} 2717 2718class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2719 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2720 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2721 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2722 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2723 [(set (Ty DPR:$Vd), 2724 (Ty (IntOp (Ty DPR:$Vn), 2725 (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2726 let isCommutable = 0; 2727} 2728class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2729 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2730 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2731 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2732 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2733 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2734 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2735 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2736 let isCommutable = 0; 2737} 2738 2739class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2740 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2741 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2742 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2743 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2744 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2745 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2746 // All of these have a two-operand InstAlias. 2747 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2748 let isCommutable = Commutable; 2749} 2750 2751class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2752 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2753 string Dt, ValueType ResTy, ValueType OpTy, 2754 SDPatternOperator IntOp, bit Commutable> 2755 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2756 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2757 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2758 2759// Same as N3VQIntnp but with Vd as a src register. 2760class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2761 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2762 string Dt, ValueType ResTy, ValueType OpTy, 2763 SDPatternOperator IntOp, bit Commutable> 2764 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2765 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2766 f, itin, OpcodeStr, Dt, 2767 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2768 (OpTy QPR:$Vm))))]> { 2769 let Constraints = "$src = $Vd"; 2770} 2771 2772class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2773 string OpcodeStr, string Dt, 2774 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2775 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2776 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2777 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2778 [(set (ResTy QPR:$Vd), 2779 (ResTy (IntOp (ResTy QPR:$Vn), 2780 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2781 imm:$lane)))))]> { 2782 let isCommutable = 0; 2783} 2784class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2785 string OpcodeStr, string Dt, 2786 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2787 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2788 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2789 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2790 [(set (ResTy QPR:$Vd), 2791 (ResTy (IntOp (ResTy QPR:$Vn), 2792 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2793 imm:$lane)))))]> { 2794 let isCommutable = 0; 2795} 2796class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2797 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2798 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2799 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2800 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2801 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2802 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2803 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2804 let isCommutable = 0; 2805} 2806 2807// Multiply-Add/Sub operations: double- and quad-register. 2808class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2809 InstrItinClass itin, string OpcodeStr, string Dt, 2810 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2811 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2812 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2813 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2814 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2815 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2816 2817class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2818 string OpcodeStr, string Dt, 2819 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2820 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2821 (outs DPR:$Vd), 2822 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2823 NVMulSLFrm, itin, 2824 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2825 [(set (Ty DPR:$Vd), 2826 (Ty (ShOp (Ty DPR:$src1), 2827 (Ty (MulOp DPR:$Vn, 2828 (Ty (ARMvduplane (Ty DPR_VFP2:$Vm), 2829 imm:$lane)))))))]>; 2830class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2831 string OpcodeStr, string Dt, 2832 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2833 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2834 (outs DPR:$Vd), 2835 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2836 NVMulSLFrm, itin, 2837 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2838 [(set (Ty DPR:$Vd), 2839 (Ty (ShOp (Ty DPR:$src1), 2840 (Ty (MulOp DPR:$Vn, 2841 (Ty (ARMvduplane (Ty DPR_8:$Vm), 2842 imm:$lane)))))))]>; 2843 2844class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2845 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2846 SDPatternOperator MulOp, SDPatternOperator OpNode> 2847 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2848 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2849 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2850 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2851 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2852class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2853 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2854 SDPatternOperator MulOp, SDPatternOperator ShOp> 2855 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2856 (outs QPR:$Vd), 2857 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2858 NVMulSLFrm, itin, 2859 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2860 [(set (ResTy QPR:$Vd), 2861 (ResTy (ShOp (ResTy QPR:$src1), 2862 (ResTy (MulOp QPR:$Vn, 2863 (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2864 imm:$lane)))))))]>; 2865class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2866 string OpcodeStr, string Dt, 2867 ValueType ResTy, ValueType OpTy, 2868 SDPatternOperator MulOp, SDPatternOperator ShOp> 2869 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2870 (outs QPR:$Vd), 2871 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2872 NVMulSLFrm, itin, 2873 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2874 [(set (ResTy QPR:$Vd), 2875 (ResTy (ShOp (ResTy QPR:$src1), 2876 (ResTy (MulOp QPR:$Vn, 2877 (ResTy (ARMvduplane (OpTy DPR_8:$Vm), 2878 imm:$lane)))))))]>; 2879 2880// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2881class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2882 InstrItinClass itin, string OpcodeStr, string Dt, 2883 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2884 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2885 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2886 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2887 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2888 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2889class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2890 InstrItinClass itin, string OpcodeStr, string Dt, 2891 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2892 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2893 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2894 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2895 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2896 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2897 2898// Neon 3-argument intrinsics, both double- and quad-register. 2899// The destination register is also used as the first source operand register. 2900class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2901 InstrItinClass itin, string OpcodeStr, string Dt, 2902 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2903 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2904 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2905 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2906 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2907 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2908class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2909 InstrItinClass itin, string OpcodeStr, string Dt, 2910 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2911 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2912 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2913 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2914 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2915 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2916 2917// Long Multiply-Add/Sub operations. 2918class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2919 InstrItinClass itin, string OpcodeStr, string Dt, 2920 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2921 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2922 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2923 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2924 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2925 (TyQ (MulOp (TyD DPR:$Vn), 2926 (TyD DPR:$Vm)))))]>; 2927class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2928 InstrItinClass itin, string OpcodeStr, string Dt, 2929 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2930 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2931 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2932 NVMulSLFrm, itin, 2933 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2934 [(set QPR:$Vd, 2935 (OpNode (TyQ QPR:$src1), 2936 (TyQ (MulOp (TyD DPR:$Vn), 2937 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm), 2938 imm:$lane))))))]>; 2939class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2940 InstrItinClass itin, string OpcodeStr, string Dt, 2941 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2942 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2943 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2944 NVMulSLFrm, itin, 2945 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2946 [(set QPR:$Vd, 2947 (OpNode (TyQ QPR:$src1), 2948 (TyQ (MulOp (TyD DPR:$Vn), 2949 (TyD (ARMvduplane (TyD DPR_8:$Vm), 2950 imm:$lane))))))]>; 2951 2952// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2953class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2954 InstrItinClass itin, string OpcodeStr, string Dt, 2955 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2956 SDNode OpNode> 2957 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2958 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2959 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2960 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2961 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2962 (TyD DPR:$Vm)))))))]>; 2963 2964// Neon Long 3-argument intrinsic. The destination register is 2965// a quad-register and is also used as the first source operand register. 2966class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2967 InstrItinClass itin, string OpcodeStr, string Dt, 2968 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2969 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2970 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2971 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2972 [(set QPR:$Vd, 2973 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2974class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2975 string OpcodeStr, string Dt, 2976 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2977 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2978 (outs QPR:$Vd), 2979 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2980 NVMulSLFrm, itin, 2981 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2982 [(set (ResTy QPR:$Vd), 2983 (ResTy (IntOp (ResTy QPR:$src1), 2984 (OpTy DPR:$Vn), 2985 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 2986 imm:$lane)))))]>; 2987class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2988 InstrItinClass itin, string OpcodeStr, string Dt, 2989 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2990 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2991 (outs QPR:$Vd), 2992 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2993 NVMulSLFrm, itin, 2994 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2995 [(set (ResTy QPR:$Vd), 2996 (ResTy (IntOp (ResTy QPR:$src1), 2997 (OpTy DPR:$Vn), 2998 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 2999 imm:$lane)))))]>; 3000 3001// Narrowing 3-register intrinsics. 3002class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3003 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 3004 SDPatternOperator IntOp, bit Commutable> 3005 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3006 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 3007 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3008 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 3009 let isCommutable = Commutable; 3010} 3011 3012// Long 3-register operations. 3013class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3014 InstrItinClass itin, string OpcodeStr, string Dt, 3015 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3016 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3017 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3018 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3019 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3020 let isCommutable = Commutable; 3021} 3022 3023class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3024 InstrItinClass itin, string OpcodeStr, string Dt, 3025 ValueType TyQ, ValueType TyD, SDNode OpNode> 3026 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3027 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3028 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3029 [(set QPR:$Vd, 3030 (TyQ (OpNode (TyD DPR:$Vn), 3031 (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3032class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3033 InstrItinClass itin, string OpcodeStr, string Dt, 3034 ValueType TyQ, ValueType TyD, SDNode OpNode> 3035 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3036 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3037 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3038 [(set QPR:$Vd, 3039 (TyQ (OpNode (TyD DPR:$Vn), 3040 (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3041 3042// Long 3-register operations with explicitly extended operands. 3043class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3044 InstrItinClass itin, string OpcodeStr, string Dt, 3045 ValueType TyQ, ValueType TyD, SDNode OpNode, SDPatternOperator ExtOp, 3046 bit Commutable> 3047 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3048 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3049 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3050 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3051 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3052 let isCommutable = Commutable; 3053} 3054 3055// Long 3-register intrinsics with explicit extend (VABDL). 3056class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3057 InstrItinClass itin, string OpcodeStr, string Dt, 3058 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3059 bit Commutable> 3060 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3061 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3062 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3063 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3064 (TyD DPR:$Vm))))))]> { 3065 let isCommutable = Commutable; 3066} 3067 3068// Long 3-register intrinsics. 3069class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3070 InstrItinClass itin, string OpcodeStr, string Dt, 3071 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3072 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3073 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3074 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3075 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3076 let isCommutable = Commutable; 3077} 3078 3079// Same as above, but not predicated. 3080class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3081 bit op4, InstrItinClass itin, string OpcodeStr, 3082 string Dt, ValueType ResTy, ValueType OpTy, 3083 SDPatternOperator IntOp, bit Commutable> 3084 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3085 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3086 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3087 3088class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3089 string OpcodeStr, string Dt, 3090 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3091 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3092 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3093 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3094 [(set (ResTy QPR:$Vd), 3095 (ResTy (IntOp (OpTy DPR:$Vn), 3096 (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm), 3097 imm:$lane)))))]>; 3098class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3099 InstrItinClass itin, string OpcodeStr, string Dt, 3100 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3101 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3102 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3103 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3104 [(set (ResTy QPR:$Vd), 3105 (ResTy (IntOp (OpTy DPR:$Vn), 3106 (OpTy (ARMvduplane (OpTy DPR_8:$Vm), 3107 imm:$lane)))))]>; 3108 3109// Wide 3-register operations. 3110class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3111 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3112 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable> 3113 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3114 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3115 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3116 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3117 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3118 // All of these have a two-operand InstAlias. 3119 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3120 let isCommutable = Commutable; 3121} 3122 3123// Pairwise long 2-register intrinsics, both double- and quad-register. 3124class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3125 bits<2> op17_16, bits<5> op11_7, bit op4, 3126 string OpcodeStr, string Dt, 3127 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3128 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3129 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3130 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3131class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3132 bits<2> op17_16, bits<5> op11_7, bit op4, 3133 string OpcodeStr, string Dt, 3134 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3135 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3136 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3137 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3138 3139// Pairwise long 2-register accumulate intrinsics, 3140// both double- and quad-register. 3141// The destination register is also used as the first source operand register. 3142class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3143 bits<2> op17_16, bits<5> op11_7, bit op4, 3144 string OpcodeStr, string Dt, 3145 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3146 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3147 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3148 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3149 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3150class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3151 bits<2> op17_16, bits<5> op11_7, bit op4, 3152 string OpcodeStr, string Dt, 3153 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3154 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3155 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3156 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3157 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3158 3159// Shift by immediate, 3160// both double- and quad-register. 3161let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3162class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3163 Format f, InstrItinClass itin, Operand ImmTy, 3164 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3165 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3166 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3167 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3168 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3169class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3170 Format f, InstrItinClass itin, Operand ImmTy, 3171 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3172 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3173 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3174 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3175 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3176} 3177 3178// Long shift by immediate. 3179class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3180 string OpcodeStr, string Dt, 3181 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3182 SDPatternOperator OpNode> 3183 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3184 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3185 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3186 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3187 3188// Narrow shift by immediate. 3189class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3190 InstrItinClass itin, string OpcodeStr, string Dt, 3191 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3192 SDPatternOperator OpNode> 3193 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3194 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3195 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3196 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3197 (i32 ImmTy:$SIMM))))]>; 3198 3199// Shift right by immediate and accumulate, 3200// both double- and quad-register. 3201let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3202class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3203 Operand ImmTy, string OpcodeStr, string Dt, 3204 ValueType Ty, SDNode ShOp> 3205 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3206 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3207 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3208 [(set DPR:$Vd, (Ty (add DPR:$src1, 3209 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3210class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3211 Operand ImmTy, string OpcodeStr, string Dt, 3212 ValueType Ty, SDNode ShOp> 3213 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3214 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3215 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3216 [(set QPR:$Vd, (Ty (add QPR:$src1, 3217 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3218} 3219 3220// Shift by immediate and insert, 3221// both double- and quad-register. 3222let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3223class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3224 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3225 ValueType Ty,SDNode ShOp> 3226 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3227 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3228 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3229 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3230class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3231 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3232 ValueType Ty,SDNode ShOp> 3233 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3234 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3235 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3236 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3237} 3238 3239// Convert, with fractional bits immediate, 3240// both double- and quad-register. 3241class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3242 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3243 SDPatternOperator IntOp> 3244 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3245 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3246 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3247 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3248class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3249 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3250 SDPatternOperator IntOp> 3251 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3252 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3253 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3254 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3255 3256//===----------------------------------------------------------------------===// 3257// Multiclasses 3258//===----------------------------------------------------------------------===// 3259 3260// Abbreviations used in multiclass suffixes: 3261// Q = quarter int (8 bit) elements 3262// H = half int (16 bit) elements 3263// S = single int (32 bit) elements 3264// D = double int (64 bit) elements 3265 3266// Neon 2-register vector operations and intrinsics. 3267 3268// Neon 2-register comparisons. 3269// source operand element sizes of 8, 16 and 32 bits: 3270multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3271 bits<5> op11_7, bit op4, string opc, string Dt, 3272 string asm, PatFrag fc> { 3273 // 64-bit vector types. 3274 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3275 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3276 opc, !strconcat(Dt, "8"), asm, "", 3277 [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; 3278 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3279 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3280 opc, !strconcat(Dt, "16"), asm, "", 3281 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; 3282 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3283 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3284 opc, !strconcat(Dt, "32"), asm, "", 3285 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; 3286 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3287 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3288 opc, "f32", asm, "", 3289 [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { 3290 let Inst{10} = 1; // overwrite F = 1 3291 } 3292 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3293 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3294 opc, "f16", asm, "", 3295 [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, 3296 Requires<[HasNEON,HasFullFP16]> { 3297 let Inst{10} = 1; // overwrite F = 1 3298 } 3299 3300 // 128-bit vector types. 3301 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3302 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3303 opc, !strconcat(Dt, "8"), asm, "", 3304 [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; 3305 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3306 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3307 opc, !strconcat(Dt, "16"), asm, "", 3308 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; 3309 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3310 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3311 opc, !strconcat(Dt, "32"), asm, "", 3312 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; 3313 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3314 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3315 opc, "f32", asm, "", 3316 [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { 3317 let Inst{10} = 1; // overwrite F = 1 3318 } 3319 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3320 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3321 opc, "f16", asm, "", 3322 [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, 3323 Requires<[HasNEON,HasFullFP16]> { 3324 let Inst{10} = 1; // overwrite F = 1 3325 } 3326} 3327 3328// Neon 3-register comparisons. 3329class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3330 InstrItinClass itin, string OpcodeStr, string Dt, 3331 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3332 : N3V<op24, op23, op21_20, op11_8, 1, op4, 3333 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 3334 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3335 [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { 3336 // All of these have a two-operand InstAlias. 3337 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3338 let isCommutable = Commutable; 3339} 3340 3341class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3342 InstrItinClass itin, string OpcodeStr, string Dt, 3343 ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> 3344 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3345 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3346 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3347 [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { 3348 // All of these have a two-operand InstAlias. 3349 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3350 let isCommutable = Commutable; 3351} 3352 3353multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, 3354 InstrItinClass itinD16, InstrItinClass itinD32, 3355 InstrItinClass itinQ16, InstrItinClass itinQ32, 3356 string OpcodeStr, string Dt, 3357 PatFrag fc, bit Commutable = 0> { 3358 // 64-bit vector types. 3359 def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, 3360 OpcodeStr, !strconcat(Dt, "8"), 3361 v8i8, v8i8, fc, Commutable>; 3362 def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, 3363 OpcodeStr, !strconcat(Dt, "16"), 3364 v4i16, v4i16, fc, Commutable>; 3365 def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, 3366 OpcodeStr, !strconcat(Dt, "32"), 3367 v2i32, v2i32, fc, Commutable>; 3368 3369 // 128-bit vector types. 3370 def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, 3371 OpcodeStr, !strconcat(Dt, "8"), 3372 v16i8, v16i8, fc, Commutable>; 3373 def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, 3374 OpcodeStr, !strconcat(Dt, "16"), 3375 v8i16, v8i16, fc, Commutable>; 3376 def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, 3377 OpcodeStr, !strconcat(Dt, "32"), 3378 v4i32, v4i32, fc, Commutable>; 3379} 3380 3381 3382// Neon 2-register vector intrinsics, 3383// element sizes of 8, 16 and 32 bits: 3384multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3385 bits<5> op11_7, bit op4, 3386 InstrItinClass itinD, InstrItinClass itinQ, 3387 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3388 // 64-bit vector types. 3389 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3390 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3391 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3392 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3393 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3394 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3395 3396 // 128-bit vector types. 3397 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3398 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3399 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3400 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3401 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3402 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3403} 3404 3405 3406// Neon Narrowing 2-register vector operations, 3407// source operand element sizes of 16, 32 and 64 bits: 3408multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3409 bits<5> op11_7, bit op6, bit op4, 3410 InstrItinClass itin, string OpcodeStr, string Dt, 3411 SDNode OpNode> { 3412 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3413 itin, OpcodeStr, !strconcat(Dt, "16"), 3414 v8i8, v8i16, OpNode>; 3415 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3416 itin, OpcodeStr, !strconcat(Dt, "32"), 3417 v4i16, v4i32, OpNode>; 3418 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3419 itin, OpcodeStr, !strconcat(Dt, "64"), 3420 v2i32, v2i64, OpNode>; 3421} 3422 3423// Neon Narrowing 2-register vector intrinsics, 3424// source operand element sizes of 16, 32 and 64 bits: 3425multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3426 bits<5> op11_7, bit op6, bit op4, 3427 InstrItinClass itin, string OpcodeStr, string Dt, 3428 SDPatternOperator IntOp> { 3429 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3430 itin, OpcodeStr, !strconcat(Dt, "16"), 3431 v8i8, v8i16, IntOp>; 3432 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3433 itin, OpcodeStr, !strconcat(Dt, "32"), 3434 v4i16, v4i32, IntOp>; 3435 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3436 itin, OpcodeStr, !strconcat(Dt, "64"), 3437 v2i32, v2i64, IntOp>; 3438} 3439 3440 3441// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3442// source operand element sizes of 16, 32 and 64 bits: 3443multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3444 string OpcodeStr, string Dt, SDNode OpNode> { 3445 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3446 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3447 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3448 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3449 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3450 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3451} 3452 3453 3454// Neon 3-register vector operations. 3455 3456// First with only element sizes of 8, 16 and 32 bits: 3457multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3458 InstrItinClass itinD16, InstrItinClass itinD32, 3459 InstrItinClass itinQ16, InstrItinClass itinQ32, 3460 string OpcodeStr, string Dt, 3461 SDNode OpNode, bit Commutable = 0> { 3462 // 64-bit vector types. 3463 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3464 OpcodeStr, !strconcat(Dt, "8"), 3465 v8i8, v8i8, OpNode, Commutable>; 3466 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3467 OpcodeStr, !strconcat(Dt, "16"), 3468 v4i16, v4i16, OpNode, Commutable>; 3469 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3470 OpcodeStr, !strconcat(Dt, "32"), 3471 v2i32, v2i32, OpNode, Commutable>; 3472 3473 // 128-bit vector types. 3474 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3475 OpcodeStr, !strconcat(Dt, "8"), 3476 v16i8, v16i8, OpNode, Commutable>; 3477 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3478 OpcodeStr, !strconcat(Dt, "16"), 3479 v8i16, v8i16, OpNode, Commutable>; 3480 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3481 OpcodeStr, !strconcat(Dt, "32"), 3482 v4i32, v4i32, OpNode, Commutable>; 3483} 3484 3485multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3486 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3487 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3488 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3489 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3490 v4i32, v2i32, ShOp>; 3491} 3492 3493// ....then also with element size 64 bits: 3494multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3495 InstrItinClass itinD, InstrItinClass itinQ, 3496 string OpcodeStr, string Dt, 3497 SDNode OpNode, bit Commutable = 0> 3498 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3499 OpcodeStr, Dt, OpNode, Commutable> { 3500 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3501 OpcodeStr, !strconcat(Dt, "64"), 3502 v1i64, v1i64, OpNode, Commutable>; 3503 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3504 OpcodeStr, !strconcat(Dt, "64"), 3505 v2i64, v2i64, OpNode, Commutable>; 3506} 3507 3508 3509// Neon 3-register vector intrinsics. 3510 3511// First with only element sizes of 16 and 32 bits: 3512multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3513 InstrItinClass itinD16, InstrItinClass itinD32, 3514 InstrItinClass itinQ16, InstrItinClass itinQ32, 3515 string OpcodeStr, string Dt, 3516 SDPatternOperator IntOp, bit Commutable = 0> { 3517 // 64-bit vector types. 3518 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3519 OpcodeStr, !strconcat(Dt, "16"), 3520 v4i16, v4i16, IntOp, Commutable>; 3521 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3522 OpcodeStr, !strconcat(Dt, "32"), 3523 v2i32, v2i32, IntOp, Commutable>; 3524 3525 // 128-bit vector types. 3526 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3527 OpcodeStr, !strconcat(Dt, "16"), 3528 v8i16, v8i16, IntOp, Commutable>; 3529 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3530 OpcodeStr, !strconcat(Dt, "32"), 3531 v4i32, v4i32, IntOp, Commutable>; 3532} 3533multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3534 InstrItinClass itinD16, InstrItinClass itinD32, 3535 InstrItinClass itinQ16, InstrItinClass itinQ32, 3536 string OpcodeStr, string Dt, 3537 SDPatternOperator IntOp> { 3538 // 64-bit vector types. 3539 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3540 OpcodeStr, !strconcat(Dt, "16"), 3541 v4i16, v4i16, IntOp>; 3542 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3543 OpcodeStr, !strconcat(Dt, "32"), 3544 v2i32, v2i32, IntOp>; 3545 3546 // 128-bit vector types. 3547 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3548 OpcodeStr, !strconcat(Dt, "16"), 3549 v8i16, v8i16, IntOp>; 3550 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3551 OpcodeStr, !strconcat(Dt, "32"), 3552 v4i32, v4i32, IntOp>; 3553} 3554 3555multiclass N3VIntSL_HS<bits<4> op11_8, 3556 InstrItinClass itinD16, InstrItinClass itinD32, 3557 InstrItinClass itinQ16, InstrItinClass itinQ32, 3558 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3559 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3560 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3561 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3562 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3563 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3564 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3565 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3566 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3567} 3568 3569// ....then also with element size of 8 bits: 3570multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3571 InstrItinClass itinD16, InstrItinClass itinD32, 3572 InstrItinClass itinQ16, InstrItinClass itinQ32, 3573 string OpcodeStr, string Dt, 3574 SDPatternOperator IntOp, bit Commutable = 0> 3575 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3576 OpcodeStr, Dt, IntOp, Commutable> { 3577 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3578 OpcodeStr, !strconcat(Dt, "8"), 3579 v8i8, v8i8, IntOp, Commutable>; 3580 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3581 OpcodeStr, !strconcat(Dt, "8"), 3582 v16i8, v16i8, IntOp, Commutable>; 3583} 3584multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3585 InstrItinClass itinD16, InstrItinClass itinD32, 3586 InstrItinClass itinQ16, InstrItinClass itinQ32, 3587 string OpcodeStr, string Dt, 3588 SDPatternOperator IntOp> 3589 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3590 OpcodeStr, Dt, IntOp> { 3591 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3592 OpcodeStr, !strconcat(Dt, "8"), 3593 v8i8, v8i8, IntOp>; 3594 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3595 OpcodeStr, !strconcat(Dt, "8"), 3596 v16i8, v16i8, IntOp>; 3597} 3598 3599 3600// ....then also with element size of 64 bits: 3601multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3602 InstrItinClass itinD16, InstrItinClass itinD32, 3603 InstrItinClass itinQ16, InstrItinClass itinQ32, 3604 string OpcodeStr, string Dt, 3605 SDPatternOperator IntOp, bit Commutable = 0> 3606 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3607 OpcodeStr, Dt, IntOp, Commutable> { 3608 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3609 OpcodeStr, !strconcat(Dt, "64"), 3610 v1i64, v1i64, IntOp, Commutable>; 3611 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3612 OpcodeStr, !strconcat(Dt, "64"), 3613 v2i64, v2i64, IntOp, Commutable>; 3614} 3615multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3616 InstrItinClass itinD16, InstrItinClass itinD32, 3617 InstrItinClass itinQ16, InstrItinClass itinQ32, 3618 string OpcodeStr, string Dt, 3619 SDPatternOperator IntOp> 3620 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3621 OpcodeStr, Dt, IntOp> { 3622 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3623 OpcodeStr, !strconcat(Dt, "64"), 3624 v1i64, v1i64, IntOp>; 3625 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3626 OpcodeStr, !strconcat(Dt, "64"), 3627 v2i64, v2i64, IntOp>; 3628} 3629 3630// Neon Narrowing 3-register vector intrinsics, 3631// source operand element sizes of 16, 32 and 64 bits: 3632multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3633 string OpcodeStr, string Dt, 3634 SDPatternOperator IntOp, bit Commutable = 0> { 3635 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3636 OpcodeStr, !strconcat(Dt, "16"), 3637 v8i8, v8i16, IntOp, Commutable>; 3638 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3639 OpcodeStr, !strconcat(Dt, "32"), 3640 v4i16, v4i32, IntOp, Commutable>; 3641 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3642 OpcodeStr, !strconcat(Dt, "64"), 3643 v2i32, v2i64, IntOp, Commutable>; 3644} 3645 3646 3647// Neon Long 3-register vector operations. 3648 3649multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3650 InstrItinClass itin16, InstrItinClass itin32, 3651 string OpcodeStr, string Dt, 3652 SDNode OpNode, bit Commutable = 0> { 3653 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3654 OpcodeStr, !strconcat(Dt, "8"), 3655 v8i16, v8i8, OpNode, Commutable>; 3656 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3657 OpcodeStr, !strconcat(Dt, "16"), 3658 v4i32, v4i16, OpNode, Commutable>; 3659 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3660 OpcodeStr, !strconcat(Dt, "32"), 3661 v2i64, v2i32, OpNode, Commutable>; 3662} 3663 3664multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3665 InstrItinClass itin, string OpcodeStr, string Dt, 3666 SDNode OpNode> { 3667 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3668 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3669 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3670 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3671} 3672 3673multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3674 InstrItinClass itin16, InstrItinClass itin32, 3675 string OpcodeStr, string Dt, 3676 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3677 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3678 OpcodeStr, !strconcat(Dt, "8"), 3679 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3680 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3681 OpcodeStr, !strconcat(Dt, "16"), 3682 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3683 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3684 OpcodeStr, !strconcat(Dt, "32"), 3685 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3686} 3687 3688// Neon Long 3-register vector intrinsics. 3689 3690// First with only element sizes of 16 and 32 bits: 3691multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3692 InstrItinClass itin16, InstrItinClass itin32, 3693 string OpcodeStr, string Dt, 3694 SDPatternOperator IntOp, bit Commutable = 0> { 3695 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3696 OpcodeStr, !strconcat(Dt, "16"), 3697 v4i32, v4i16, IntOp, Commutable>; 3698 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3699 OpcodeStr, !strconcat(Dt, "32"), 3700 v2i64, v2i32, IntOp, Commutable>; 3701} 3702 3703multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3704 InstrItinClass itin, string OpcodeStr, string Dt, 3705 SDPatternOperator IntOp> { 3706 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3707 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3708 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3709 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3710} 3711 3712// ....then also with element size of 8 bits: 3713multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3714 InstrItinClass itin16, InstrItinClass itin32, 3715 string OpcodeStr, string Dt, 3716 SDPatternOperator IntOp, bit Commutable = 0> 3717 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3718 IntOp, Commutable> { 3719 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3720 OpcodeStr, !strconcat(Dt, "8"), 3721 v8i16, v8i8, IntOp, Commutable>; 3722} 3723 3724// ....with explicit extend (VABDL). 3725multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3726 InstrItinClass itin, string OpcodeStr, string Dt, 3727 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3728 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3729 OpcodeStr, !strconcat(Dt, "8"), 3730 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3731 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3732 OpcodeStr, !strconcat(Dt, "16"), 3733 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3734 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3735 OpcodeStr, !strconcat(Dt, "32"), 3736 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3737} 3738 3739 3740// Neon Wide 3-register vector intrinsics, 3741// source operand element sizes of 8, 16 and 32 bits: 3742multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3743 string OpcodeStr, string Dt, 3744 SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> { 3745 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3746 OpcodeStr, !strconcat(Dt, "8"), 3747 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3748 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3749 OpcodeStr, !strconcat(Dt, "16"), 3750 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3751 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3752 OpcodeStr, !strconcat(Dt, "32"), 3753 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3754} 3755 3756 3757// Neon Multiply-Op vector operations, 3758// element sizes of 8, 16 and 32 bits: 3759multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3760 InstrItinClass itinD16, InstrItinClass itinD32, 3761 InstrItinClass itinQ16, InstrItinClass itinQ32, 3762 string OpcodeStr, string Dt, SDNode OpNode> { 3763 // 64-bit vector types. 3764 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3765 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3766 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3767 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3768 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3769 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3770 3771 // 128-bit vector types. 3772 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3773 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3774 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3775 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3776 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3777 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3778} 3779 3780multiclass N3VMulOpSL_HS<bits<4> op11_8, 3781 InstrItinClass itinD16, InstrItinClass itinD32, 3782 InstrItinClass itinQ16, InstrItinClass itinQ32, 3783 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3784 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3785 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3786 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3787 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3788 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3789 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3790 mul, ShOp>; 3791 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3792 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3793 mul, ShOp>; 3794} 3795 3796// Neon Intrinsic-Op vector operations, 3797// element sizes of 8, 16 and 32 bits: 3798multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3799 InstrItinClass itinD, InstrItinClass itinQ, 3800 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3801 SDNode OpNode> { 3802 // 64-bit vector types. 3803 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3804 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3805 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3806 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3807 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3808 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3809 3810 // 128-bit vector types. 3811 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3812 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3813 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3814 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3815 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3816 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3817} 3818 3819// Neon 3-argument intrinsics, 3820// element sizes of 16 and 32 bits: 3821multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3822 InstrItinClass itinD16, InstrItinClass itinD32, 3823 InstrItinClass itinQ16, InstrItinClass itinQ32, 3824 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3825 // 64-bit vector types. 3826 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3827 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3828 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3829 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3830 3831 // 128-bit vector types. 3832 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3833 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3834 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3835 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3836} 3837 3838// element sizes of 8, 16 and 32 bits: 3839multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3840 InstrItinClass itinD16, InstrItinClass itinD32, 3841 InstrItinClass itinQ16, InstrItinClass itinQ32, 3842 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3843 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3844 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3845 // 64-bit vector types. 3846 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3847 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3848 // 128-bit vector types. 3849 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3850 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3851} 3852 3853// Neon Long Multiply-Op vector operations, 3854// element sizes of 8, 16 and 32 bits: 3855multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3856 InstrItinClass itin16, InstrItinClass itin32, 3857 string OpcodeStr, string Dt, SDNode MulOp, 3858 SDNode OpNode> { 3859 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3860 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3861 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3862 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3863 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3864 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3865} 3866 3867multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3868 string Dt, SDNode MulOp, SDNode OpNode> { 3869 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3870 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3871 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3872 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3873} 3874 3875 3876// Neon Long 3-argument intrinsics. 3877 3878// First with only element sizes of 16 and 32 bits: 3879multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3880 InstrItinClass itin16, InstrItinClass itin32, 3881 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3882 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3883 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3884 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3885 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3886} 3887 3888multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3889 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3890 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3891 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3892 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3893 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3894} 3895 3896// ....then also with element size of 8 bits: 3897multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3898 InstrItinClass itin16, InstrItinClass itin32, 3899 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3900 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3901 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3902 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3903} 3904 3905// ....with explicit extend (VABAL). 3906multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3907 InstrItinClass itin, string OpcodeStr, string Dt, 3908 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3909 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3910 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3911 IntOp, ExtOp, OpNode>; 3912 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3913 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3914 IntOp, ExtOp, OpNode>; 3915 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3916 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3917 IntOp, ExtOp, OpNode>; 3918} 3919 3920 3921// Neon Pairwise long 2-register intrinsics, 3922// element sizes of 8, 16 and 32 bits: 3923multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3924 bits<5> op11_7, bit op4, 3925 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3926 // 64-bit vector types. 3927 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3928 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3929 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3930 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3931 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3932 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3933 3934 // 128-bit vector types. 3935 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3936 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3937 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3938 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3939 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3940 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3941} 3942 3943 3944// Neon Pairwise long 2-register accumulate intrinsics, 3945// element sizes of 8, 16 and 32 bits: 3946multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3947 bits<5> op11_7, bit op4, 3948 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3949 // 64-bit vector types. 3950 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3951 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3952 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3953 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3954 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3955 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3956 3957 // 128-bit vector types. 3958 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3959 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3960 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3961 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3962 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3963 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3964} 3965 3966 3967// Neon 2-register vector shift by immediate, 3968// with f of either N2RegVShLFrm or N2RegVShRFrm 3969// element sizes of 8, 16, 32 and 64 bits: 3970multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3971 InstrItinClass itin, string OpcodeStr, string Dt, 3972 SDNode OpNode> { 3973 // 64-bit vector types. 3974 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3975 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3976 let Inst{21-19} = 0b001; // imm6 = 001xxx 3977 } 3978 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3979 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3980 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3981 } 3982 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3983 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3984 let Inst{21} = 0b1; // imm6 = 1xxxxx 3985 } 3986 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3987 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3988 // imm6 = xxxxxx 3989 3990 // 128-bit vector types. 3991 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3992 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3993 let Inst{21-19} = 0b001; // imm6 = 001xxx 3994 } 3995 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3996 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3997 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3998 } 3999 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 4000 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4001 let Inst{21} = 0b1; // imm6 = 1xxxxx 4002 } 4003 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4004 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4005 // imm6 = xxxxxx 4006} 4007multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4008 InstrItinClass itin, string OpcodeStr, string Dt, 4009 string baseOpc, SDNode OpNode> { 4010 // 64-bit vector types. 4011 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4012 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4013 let Inst{21-19} = 0b001; // imm6 = 001xxx 4014 } 4015 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4016 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4017 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4018 } 4019 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4020 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4021 let Inst{21} = 0b1; // imm6 = 1xxxxx 4022 } 4023 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4024 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4025 // imm6 = xxxxxx 4026 4027 // 128-bit vector types. 4028 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4029 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4030 let Inst{21-19} = 0b001; // imm6 = 001xxx 4031 } 4032 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4033 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4034 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4035 } 4036 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4037 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4038 let Inst{21} = 0b1; // imm6 = 1xxxxx 4039 } 4040 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4041 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4042 // imm6 = xxxxxx 4043} 4044 4045// Neon Shift-Accumulate vector operations, 4046// element sizes of 8, 16, 32 and 64 bits: 4047multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4048 string OpcodeStr, string Dt, SDNode ShOp> { 4049 // 64-bit vector types. 4050 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4051 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4052 let Inst{21-19} = 0b001; // imm6 = 001xxx 4053 } 4054 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4055 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4056 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4057 } 4058 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4059 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4060 let Inst{21} = 0b1; // imm6 = 1xxxxx 4061 } 4062 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4063 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4064 // imm6 = xxxxxx 4065 4066 // 128-bit vector types. 4067 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4068 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4069 let Inst{21-19} = 0b001; // imm6 = 001xxx 4070 } 4071 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4072 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4073 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4074 } 4075 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4076 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4077 let Inst{21} = 0b1; // imm6 = 1xxxxx 4078 } 4079 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4080 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4081 // imm6 = xxxxxx 4082} 4083 4084// Neon Shift-Insert vector operations, 4085// with f of either N2RegVShLFrm or N2RegVShRFrm 4086// element sizes of 8, 16, 32 and 64 bits: 4087multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4088 string OpcodeStr> { 4089 // 64-bit vector types. 4090 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4091 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> { 4092 let Inst{21-19} = 0b001; // imm6 = 001xxx 4093 } 4094 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4095 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> { 4096 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4097 } 4098 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4099 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> { 4100 let Inst{21} = 0b1; // imm6 = 1xxxxx 4101 } 4102 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4103 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>; 4104 // imm6 = xxxxxx 4105 4106 // 128-bit vector types. 4107 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4108 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> { 4109 let Inst{21-19} = 0b001; // imm6 = 001xxx 4110 } 4111 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4112 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> { 4113 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4114 } 4115 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4116 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> { 4117 let Inst{21} = 0b1; // imm6 = 1xxxxx 4118 } 4119 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4120 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>; 4121 // imm6 = xxxxxx 4122} 4123multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4124 string OpcodeStr> { 4125 // 64-bit vector types. 4126 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4127 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> { 4128 let Inst{21-19} = 0b001; // imm6 = 001xxx 4129 } 4130 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4131 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> { 4132 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4133 } 4134 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4135 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> { 4136 let Inst{21} = 0b1; // imm6 = 1xxxxx 4137 } 4138 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4139 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>; 4140 // imm6 = xxxxxx 4141 4142 // 128-bit vector types. 4143 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4144 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> { 4145 let Inst{21-19} = 0b001; // imm6 = 001xxx 4146 } 4147 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4148 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> { 4149 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4150 } 4151 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4152 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> { 4153 let Inst{21} = 0b1; // imm6 = 1xxxxx 4154 } 4155 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4156 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>; 4157 // imm6 = xxxxxx 4158} 4159 4160// Neon Shift Long operations, 4161// element sizes of 8, 16, 32 bits: 4162multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4163 bit op4, string OpcodeStr, string Dt, 4164 SDPatternOperator OpNode> { 4165 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4166 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4167 let Inst{21-19} = 0b001; // imm6 = 001xxx 4168 } 4169 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4170 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4171 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4172 } 4173 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4174 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4175 let Inst{21} = 0b1; // imm6 = 1xxxxx 4176 } 4177} 4178 4179// Neon Shift Narrow operations, 4180// element sizes of 16, 32, 64 bits: 4181multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4182 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4183 SDPatternOperator OpNode> { 4184 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4185 OpcodeStr, !strconcat(Dt, "16"), 4186 v8i8, v8i16, shr_imm8, OpNode> { 4187 let Inst{21-19} = 0b001; // imm6 = 001xxx 4188 } 4189 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4190 OpcodeStr, !strconcat(Dt, "32"), 4191 v4i16, v4i32, shr_imm16, OpNode> { 4192 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4193 } 4194 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4195 OpcodeStr, !strconcat(Dt, "64"), 4196 v2i32, v2i64, shr_imm32, OpNode> { 4197 let Inst{21} = 0b1; // imm6 = 1xxxxx 4198 } 4199} 4200 4201//===----------------------------------------------------------------------===// 4202// Instruction Definitions. 4203//===----------------------------------------------------------------------===// 4204 4205// Vector Add Operations. 4206 4207// VADD : Vector Add (integer and floating-point) 4208defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4209 add, 1>; 4210def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4211 v2f32, v2f32, fadd, 1>; 4212def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4213 v4f32, v4f32, fadd, 1>; 4214def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4215 v4f16, v4f16, fadd, 1>, 4216 Requires<[HasNEON,HasFullFP16]>; 4217def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4218 v8f16, v8f16, fadd, 1>, 4219 Requires<[HasNEON,HasFullFP16]>; 4220// VADDL : Vector Add Long (Q = D + D) 4221defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4222 "vaddl", "s", add, sext, 1>; 4223defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4224 "vaddl", "u", add, zanyext, 1>; 4225// VADDW : Vector Add Wide (Q = Q + D) 4226defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4227defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>; 4228// VHADD : Vector Halving Add 4229defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4230 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4231 "vhadd", "s", int_arm_neon_vhadds, 1>; 4232defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4233 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4234 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4235// VRHADD : Vector Rounding Halving Add 4236defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4237 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4238 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4239defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4240 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4241 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4242// VQADD : Vector Saturating Add 4243defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4244 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4245 "vqadd", "s", saddsat, 1>; 4246defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4247 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4248 "vqadd", "u", uaddsat, 1>; 4249// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4250defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4251// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4252defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4253 int_arm_neon_vraddhn, 1>; 4254 4255let Predicates = [HasNEON] in { 4256def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4257 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4258def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4259 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4260def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4261 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4262} 4263 4264// Vector Multiply Operations. 4265 4266// VMUL : Vector Multiply (integer, polynomial and floating-point) 4267defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4268 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4269def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4270 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4271def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4272 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4273def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4274 v2f32, v2f32, fmul, 1>; 4275def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4276 v4f32, v4f32, fmul, 1>; 4277def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4278 v4f16, v4f16, fmul, 1>, 4279 Requires<[HasNEON,HasFullFP16]>; 4280def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4281 v8f16, v8f16, fmul, 1>, 4282 Requires<[HasNEON,HasFullFP16]>; 4283defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4284def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4285def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4286 v2f32, fmul>; 4287def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4288 Requires<[HasNEON,HasFullFP16]>; 4289def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4290 v4f16, fmul>, 4291 Requires<[HasNEON,HasFullFP16]>; 4292 4293let Predicates = [HasNEON] in { 4294def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4295 (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))), 4296 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4297 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4298 (DSubReg_i16_reg imm:$lane))), 4299 (SubReg_i16_lane imm:$lane)))>; 4300def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4301 (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))), 4302 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4303 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4304 (DSubReg_i32_reg imm:$lane))), 4305 (SubReg_i32_lane imm:$lane)))>; 4306def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4307 (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))), 4308 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4309 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4310 (DSubReg_i32_reg imm:$lane))), 4311 (SubReg_i32_lane imm:$lane)))>; 4312def : Pat<(v8f16 (fmul (v8f16 QPR:$src1), 4313 (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))), 4314 (v8f16 (VMULslhq(v8f16 QPR:$src1), 4315 (v4f16 (EXTRACT_SUBREG QPR:$src2, 4316 (DSubReg_i16_reg imm:$lane))), 4317 (SubReg_i16_lane imm:$lane)))>; 4318 4319def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4320 (VMULslfd DPR:$Rn, 4321 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4322 (i32 0))>; 4323def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4324 (VMULslhd DPR:$Rn, 4325 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4326 (i32 0))>; 4327def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))), 4328 (VMULslfq QPR:$Rn, 4329 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4330 (i32 0))>; 4331def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))), 4332 (VMULslhq QPR:$Rn, 4333 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0), 4334 (i32 0))>; 4335} 4336 4337// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4338defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4339 IIC_VMULi16Q, IIC_VMULi32Q, 4340 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4341defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4342 IIC_VMULi16Q, IIC_VMULi32Q, 4343 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4344 4345let Predicates = [HasNEON] in { 4346def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4347 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4348 imm:$lane)))), 4349 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4350 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4351 (DSubReg_i16_reg imm:$lane))), 4352 (SubReg_i16_lane imm:$lane)))>; 4353def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4354 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4355 imm:$lane)))), 4356 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4357 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4358 (DSubReg_i32_reg imm:$lane))), 4359 (SubReg_i32_lane imm:$lane)))>; 4360} 4361 4362// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4363defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4364 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4365 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4366defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4367 IIC_VMULi16Q, IIC_VMULi32Q, 4368 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4369 4370let Predicates = [HasNEON] in { 4371def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4372 (v8i16 (ARMvduplane (v8i16 QPR:$src2), 4373 imm:$lane)))), 4374 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4375 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4376 (DSubReg_i16_reg imm:$lane))), 4377 (SubReg_i16_lane imm:$lane)))>; 4378def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4379 (v4i32 (ARMvduplane (v4i32 QPR:$src2), 4380 imm:$lane)))), 4381 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4382 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4383 (DSubReg_i32_reg imm:$lane))), 4384 (SubReg_i32_lane imm:$lane)))>; 4385} 4386 4387// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4388let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4389 DecoderNamespace = "NEONData" in { 4390 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4391 "vmull", "s", ARMvmulls, 1>; 4392 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4393 "vmull", "u", ARMvmullu, 1>; 4394 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4395 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4396 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4397 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4398 Requires<[HasV8, HasAES]>; 4399} 4400defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>; 4401defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>; 4402 4403// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4404defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4405 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4406defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4407 "vqdmull", "s", int_arm_neon_vqdmull>; 4408 4409// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4410 4411// VMLA : Vector Multiply Accumulate (integer and floating-point) 4412defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4413 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4414def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4415 v2f32, fmul_su, fadd_mlx>, 4416 Requires<[HasNEON, UseFPVMLx]>; 4417def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4418 v4f32, fmul_su, fadd_mlx>, 4419 Requires<[HasNEON, UseFPVMLx]>; 4420def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4421 v4f16, fmul_su, fadd_mlx>, 4422 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4423def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4424 v8f16, fmul_su, fadd_mlx>, 4425 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4426defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4427 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4428def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4429 v2f32, fmul_su, fadd_mlx>, 4430 Requires<[HasNEON, UseFPVMLx]>; 4431def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4432 v4f32, v2f32, fmul_su, fadd_mlx>, 4433 Requires<[HasNEON, UseFPVMLx]>; 4434def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4435 v4f16, fmul, fadd>, 4436 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4437def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4438 v8f16, v4f16, fmul, fadd>, 4439 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4440 4441let Predicates = [HasNEON] in { 4442def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4443 (mul (v8i16 QPR:$src2), 4444 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4445 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4446 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4447 (DSubReg_i16_reg imm:$lane))), 4448 (SubReg_i16_lane imm:$lane)))>; 4449 4450def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4451 (mul (v4i32 QPR:$src2), 4452 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4453 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4454 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4455 (DSubReg_i32_reg imm:$lane))), 4456 (SubReg_i32_lane imm:$lane)))>; 4457} 4458 4459def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4460 (fmul_su (v4f32 QPR:$src2), 4461 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4462 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4463 (v4f32 QPR:$src2), 4464 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4465 (DSubReg_i32_reg imm:$lane))), 4466 (SubReg_i32_lane imm:$lane)))>, 4467 Requires<[HasNEON, UseFPVMLx]>; 4468 4469// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4470defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4471 "vmlal", "s", ARMvmulls, add>; 4472defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4473 "vmlal", "u", ARMvmullu, add>; 4474 4475defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>; 4476defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>; 4477 4478let Predicates = [HasNEON, HasV8_1a] in { 4479 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4480 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4481 // (Q += D * D) 4482 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4483 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4484 null_frag>; 4485 def : Pat<(v4i16 (saddsat 4486 (v4i16 DPR:$src1), 4487 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4488 (v4i16 DPR:$Vm))))), 4489 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4490 def : Pat<(v2i32 (saddsat 4491 (v2i32 DPR:$src1), 4492 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4493 (v2i32 DPR:$Vm))))), 4494 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4495 def : Pat<(v8i16 (saddsat 4496 (v8i16 QPR:$src1), 4497 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4498 (v8i16 QPR:$Vm))))), 4499 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4500 def : Pat<(v4i32 (saddsat 4501 (v4i32 QPR:$src1), 4502 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4503 (v4i32 QPR:$Vm))))), 4504 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4505 4506 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4507 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4508 null_frag>; 4509 def : Pat<(v4i16 (saddsat 4510 (v4i16 DPR:$src1), 4511 (v4i16 (int_arm_neon_vqrdmulh 4512 (v4i16 DPR:$Vn), 4513 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4514 imm:$lane)))))), 4515 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4516 imm:$lane))>; 4517 def : Pat<(v2i32 (saddsat 4518 (v2i32 DPR:$src1), 4519 (v2i32 (int_arm_neon_vqrdmulh 4520 (v2i32 DPR:$Vn), 4521 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4522 imm:$lane)))))), 4523 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4524 imm:$lane))>; 4525 def : Pat<(v8i16 (saddsat 4526 (v8i16 QPR:$src1), 4527 (v8i16 (int_arm_neon_vqrdmulh 4528 (v8i16 QPR:$src2), 4529 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4530 imm:$lane)))))), 4531 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4532 (v8i16 QPR:$src2), 4533 (v4i16 (EXTRACT_SUBREG 4534 QPR:$src3, 4535 (DSubReg_i16_reg imm:$lane))), 4536 (SubReg_i16_lane imm:$lane)))>; 4537 def : Pat<(v4i32 (saddsat 4538 (v4i32 QPR:$src1), 4539 (v4i32 (int_arm_neon_vqrdmulh 4540 (v4i32 QPR:$src2), 4541 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4542 imm:$lane)))))), 4543 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4544 (v4i32 QPR:$src2), 4545 (v2i32 (EXTRACT_SUBREG 4546 QPR:$src3, 4547 (DSubReg_i32_reg imm:$lane))), 4548 (SubReg_i32_lane imm:$lane)))>; 4549 4550 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4551 // (Q -= D * D) 4552 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4553 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4554 null_frag>; 4555 def : Pat<(v4i16 (ssubsat 4556 (v4i16 DPR:$src1), 4557 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4558 (v4i16 DPR:$Vm))))), 4559 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4560 def : Pat<(v2i32 (ssubsat 4561 (v2i32 DPR:$src1), 4562 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4563 (v2i32 DPR:$Vm))))), 4564 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4565 def : Pat<(v8i16 (ssubsat 4566 (v8i16 QPR:$src1), 4567 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4568 (v8i16 QPR:$Vm))))), 4569 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4570 def : Pat<(v4i32 (ssubsat 4571 (v4i32 QPR:$src1), 4572 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4573 (v4i32 QPR:$Vm))))), 4574 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4575 4576 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4577 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4578 null_frag>; 4579 def : Pat<(v4i16 (ssubsat 4580 (v4i16 DPR:$src1), 4581 (v4i16 (int_arm_neon_vqrdmulh 4582 (v4i16 DPR:$Vn), 4583 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4584 imm:$lane)))))), 4585 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4586 def : Pat<(v2i32 (ssubsat 4587 (v2i32 DPR:$src1), 4588 (v2i32 (int_arm_neon_vqrdmulh 4589 (v2i32 DPR:$Vn), 4590 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4591 imm:$lane)))))), 4592 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4593 imm:$lane))>; 4594 def : Pat<(v8i16 (ssubsat 4595 (v8i16 QPR:$src1), 4596 (v8i16 (int_arm_neon_vqrdmulh 4597 (v8i16 QPR:$src2), 4598 (v8i16 (ARMvduplane (v8i16 QPR:$src3), 4599 imm:$lane)))))), 4600 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4601 (v8i16 QPR:$src2), 4602 (v4i16 (EXTRACT_SUBREG 4603 QPR:$src3, 4604 (DSubReg_i16_reg imm:$lane))), 4605 (SubReg_i16_lane imm:$lane)))>; 4606 def : Pat<(v4i32 (ssubsat 4607 (v4i32 QPR:$src1), 4608 (v4i32 (int_arm_neon_vqrdmulh 4609 (v4i32 QPR:$src2), 4610 (v4i32 (ARMvduplane (v4i32 QPR:$src3), 4611 imm:$lane)))))), 4612 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4613 (v4i32 QPR:$src2), 4614 (v2i32 (EXTRACT_SUBREG 4615 QPR:$src3, 4616 (DSubReg_i32_reg imm:$lane))), 4617 (SubReg_i32_lane imm:$lane)))>; 4618} 4619// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4620defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4621 "vqdmlal", "s", null_frag>; 4622defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4623 4624let Predicates = [HasNEON] in { 4625def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4626 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4627 (v4i16 DPR:$Vm))))), 4628 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4629def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4630 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4631 (v2i32 DPR:$Vm))))), 4632 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4633def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), 4634 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4635 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4636 imm:$lane)))))), 4637 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4638def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), 4639 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4640 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4641 imm:$lane)))))), 4642 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4643} 4644 4645// VMLS : Vector Multiply Subtract (integer and floating-point) 4646defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4647 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4648def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4649 v2f32, fmul_su, fsub_mlx>, 4650 Requires<[HasNEON, UseFPVMLx]>; 4651def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4652 v4f32, fmul_su, fsub_mlx>, 4653 Requires<[HasNEON, UseFPVMLx]>; 4654def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4655 v4f16, fmul, fsub>, 4656 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4657def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4658 v8f16, fmul, fsub>, 4659 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4660defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4661 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4662def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4663 v2f32, fmul_su, fsub_mlx>, 4664 Requires<[HasNEON, UseFPVMLx]>; 4665def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4666 v4f32, v2f32, fmul_su, fsub_mlx>, 4667 Requires<[HasNEON, UseFPVMLx]>; 4668def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4669 v4f16, fmul, fsub>, 4670 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4671def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4672 v8f16, v4f16, fmul, fsub>, 4673 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4674 4675let Predicates = [HasNEON] in { 4676def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4677 (mul (v8i16 QPR:$src2), 4678 (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))), 4679 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4680 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4681 (DSubReg_i16_reg imm:$lane))), 4682 (SubReg_i16_lane imm:$lane)))>; 4683 4684def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4685 (mul (v4i32 QPR:$src2), 4686 (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))), 4687 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4688 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4689 (DSubReg_i32_reg imm:$lane))), 4690 (SubReg_i32_lane imm:$lane)))>; 4691} 4692 4693def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4694 (fmul_su (v4f32 QPR:$src2), 4695 (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))), 4696 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4697 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4698 (DSubReg_i32_reg imm:$lane))), 4699 (SubReg_i32_lane imm:$lane)))>, 4700 Requires<[HasNEON, UseFPVMLx]>; 4701 4702// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4703defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4704 "vmlsl", "s", ARMvmulls, sub>; 4705defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4706 "vmlsl", "u", ARMvmullu, sub>; 4707 4708defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>; 4709defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>; 4710 4711// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4712defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4713 "vqdmlsl", "s", null_frag>; 4714defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4715 4716let Predicates = [HasNEON] in { 4717def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4718 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4719 (v4i16 DPR:$Vm))))), 4720 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4721def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4722 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4723 (v2i32 DPR:$Vm))))), 4724 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4725def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), 4726 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4727 (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), 4728 imm:$lane)))))), 4729 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4730def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), 4731 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4732 (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), 4733 imm:$lane)))))), 4734 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4735} 4736 4737// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4738def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4739 v2f32, fmul_su, fadd_mlx>, 4740 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4741 4742def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4743 v4f32, fmul_su, fadd_mlx>, 4744 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4745def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4746 v4f16, fmul, fadd>, 4747 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4748 4749def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4750 v8f16, fmul, fadd>, 4751 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4752 4753// Fused Vector Multiply Subtract (floating-point) 4754def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4755 v2f32, fmul_su, fsub_mlx>, 4756 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4757def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4758 v4f32, fmul_su, fsub_mlx>, 4759 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4760def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4761 v4f16, fmul, fsub>, 4762 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4763def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4764 v8f16, fmul, fsub>, 4765 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4766 4767// Match @llvm.fma.* intrinsics 4768def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4769 (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4770 Requires<[HasNEON,HasFullFP16]>; 4771def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4772 (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4773 Requires<[HasNEON,HasFullFP16]>; 4774def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4775 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4776 Requires<[HasNEON,HasVFP4]>; 4777def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4778 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4779 Requires<[HasNEON,HasVFP4]>; 4780def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4781 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4782 Requires<[HasNEON,HasVFP4]>; 4783def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4784 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4785 Requires<[HasNEON,HasVFP4]>; 4786 4787// ARMv8.2a dot product instructions. 4788// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4789// encodings are the same and thus no further bit twiddling is necessary 4790// in the disassembler. 4791class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm, 4792 string AsmTy, ValueType AccumTy, ValueType InputTy, 4793 SDPatternOperator OpNode> : 4794 N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4795 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4796 Asm, AsmTy, 4797 [(set (AccumTy RegTy:$dst), 4798 (OpNode (AccumTy RegTy:$Vd), 4799 (InputTy RegTy:$Vn), 4800 (InputTy RegTy:$Vm)))]> { 4801 let Predicates = [HasDotProd]; 4802 let DecoderNamespace = "VFPV8"; 4803 let Constraints = "$dst = $Vd"; 4804} 4805 4806def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4807def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4808def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4809def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4810 4811// Indexed dot product instructions: 4812multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4813 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4814 dag RHS> { 4815 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4816 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4817 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4818 bit lane; 4819 let Inst{5} = lane; 4820 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4821 let Constraints = "$dst = $Vd"; 4822 let Predicates = [HasDotProd]; 4823 let DecoderNamespace = "VFPV8"; 4824 } 4825 4826 def : Pat< 4827 (AccumType (OpNode (AccumType Ty:$Vd), 4828 (InputType Ty:$Vn), 4829 (InputType (bitconvert (AccumType 4830 (ARMvduplane (AccumType Ty:$Vm), 4831 VectorIndex32:$lane)))))), 4832 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4833} 4834 4835defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4836 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4837defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4838 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4839defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4840 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4841defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4842 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4843 4844// v8.6A matrix multiplication extension 4845let Predicates = [HasMatMulInt8] in { 4846 class N3VMatMul<bit B, bit U, string Asm, string AsmTy, 4847 SDPatternOperator OpNode> 4848 : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst), 4849 (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary, 4850 Asm, AsmTy, 4851 [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd), 4852 (v16i8 QPR:$Vn), 4853 (v16i8 QPR:$Vm)))]> { 4854 let DecoderNamespace = "VFPV8"; 4855 let Constraints = "$dst = $Vd"; 4856 } 4857 4858 multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy, 4859 ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode, 4860 dag RHS> { 4861 4862 def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst), 4863 (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm, 4864 NoItinerary, Asm, AsmTy, []> { 4865 bit lane; 4866 let Inst{5} = lane; 4867 let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane"); 4868 let DecoderNamespace = "VFPV8"; 4869 let Constraints = "$dst = $Vd"; 4870 } 4871 4872 def : Pat< 4873 (AccumTy (OpNode (AccumTy RegTy:$Vd), 4874 (InputTy RegTy:$Vn), 4875 (InputTy (bitconvert (AccumTy 4876 (ARMvduplane (AccumTy RegTy:$Vm), 4877 VectorIndex32:$lane)))))), 4878 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4879 4880 } 4881 4882 multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS> 4883 : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> { 4884 def : Pat< 4885 (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd), 4886 (InputTy (bitconvert (AccumTy 4887 (ARMvduplane (AccumTy RegTy:$Vm), 4888 VectorIndex32:$lane)))), 4889 (InputTy RegTy:$Vn))), 4890 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 4891 } 4892 4893 def VSMMLA : N3VMatMul<0, 0, "vsmmla", "s8", int_arm_neon_smmla>; 4894 def VUMMLA : N3VMatMul<0, 1, "vummla", "u8", int_arm_neon_ummla>; 4895 def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>; 4896 def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8, int_arm_neon_usdot>; 4897 def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>; 4898 4899 defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8, 4900 int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>; 4901 defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8, 4902 int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4903 defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>; 4904 defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4905} 4906 4907// ARMv8.3 complex operations 4908class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4909 InstrItinClass itin, dag oops, dag iops, 4910 string opc, string dt, list<dag> pattern> 4911 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4912 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4913 bits<2> rot; 4914 let Inst{24-23} = rot; 4915} 4916 4917class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4918 InstrItinClass itin, dag oops, dag iops, string opc, 4919 string dt, list<dag> pattern> 4920 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4921 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4922 bits<1> rot; 4923 let Inst{24} = rot; 4924} 4925 4926class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4927 dag oops, dag iops, string opc, string dt, 4928 list<dag> pattern> 4929 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4930 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4931 bits<2> rot; 4932 bit lane; 4933 4934 let Inst{21-20} = rot; 4935 let Inst{5} = lane; 4936} 4937 4938class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4939 dag oops, dag iops, string opc, string dt, 4940 list<dag> pattern> 4941 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4942 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4943 bits<2> rot; 4944 bit lane; 4945 4946 let Inst{21-20} = rot; 4947 let Inst{5} = Vm{4}; 4948 // This is needed because the lane operand does not have any bits in the 4949 // encoding (it only has one possible value), so we need to manually set it 4950 // to it's default value. 4951 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4952} 4953 4954multiclass N3VCP8ComplexTied<bit op21, bit op4, 4955 string OpcodeStr, SDPatternOperator Op> { 4956 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4957 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4958 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4959 OpcodeStr, "f16", []>; 4960 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4961 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4962 OpcodeStr, "f16", []>; 4963 } 4964 let Predicates = [HasNEON,HasV8_3a] in { 4965 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 4966 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4967 OpcodeStr, "f32", []>; 4968 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 4969 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4970 OpcodeStr, "f32", []>; 4971 } 4972} 4973 4974multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 4975 string OpcodeStr, SDPatternOperator Op> { 4976 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4977 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 4978 (outs DPR:$Vd), 4979 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4980 OpcodeStr, "f16", []>; 4981 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 4982 (outs QPR:$Vd), 4983 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4984 OpcodeStr, "f16", []>; 4985 } 4986 let Predicates = [HasNEON,HasV8_3a] in { 4987 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 4988 (outs DPR:$Vd), 4989 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4990 OpcodeStr, "f32", []>; 4991 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 4992 (outs QPR:$Vd), 4993 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4994 OpcodeStr, "f32", []>; 4995 } 4996} 4997 4998// These instructions index by pairs of lanes, so the VectorIndexes are twice 4999// as wide as the data types. 5000multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, 5001 SDPatternOperator Op> { 5002 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5003 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 5004 (outs DPR:$Vd), 5005 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 5006 VectorIndex32:$lane, complexrotateop:$rot), 5007 OpcodeStr, "f16", []>; 5008 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 5009 (outs QPR:$Vd), 5010 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 5011 VectorIndex32:$lane, complexrotateop:$rot), 5012 OpcodeStr, "f16", []>; 5013 } 5014 let Predicates = [HasNEON,HasV8_3a] in { 5015 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 5016 (outs DPR:$Vd), 5017 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5018 complexrotateop:$rot), 5019 OpcodeStr, "f32", []>; 5020 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 5021 (outs QPR:$Vd), 5022 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 5023 complexrotateop:$rot), 5024 OpcodeStr, "f32", []>; 5025 } 5026} 5027 5028defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; 5029defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; 5030defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; 5031 5032let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 5033 def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5034 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; 5035 def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), 5036 (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; 5037 def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5038 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; 5039 def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), 5040 (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; 5041} 5042let Predicates = [HasNEON,HasV8_3a] in { 5043 def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5044 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; 5045 def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), 5046 (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; 5047 def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5048 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; 5049 def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), 5050 (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; 5051} 5052 5053// Vector Subtract Operations. 5054 5055// VSUB : Vector Subtract (integer and floating-point) 5056defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 5057 "vsub", "i", sub, 0>; 5058def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 5059 v2f32, v2f32, fsub, 0>; 5060def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 5061 v4f32, v4f32, fsub, 0>; 5062def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 5063 v4f16, v4f16, fsub, 0>, 5064 Requires<[HasNEON,HasFullFP16]>; 5065def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 5066 v8f16, v8f16, fsub, 0>, 5067 Requires<[HasNEON,HasFullFP16]>; 5068// VSUBL : Vector Subtract Long (Q = D - D) 5069defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5070 "vsubl", "s", sub, sext, 0>; 5071defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 5072 "vsubl", "u", sub, zanyext, 0>; 5073// VSUBW : Vector Subtract Wide (Q = Q - D) 5074defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 5075defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>; 5076// VHSUB : Vector Halving Subtract 5077defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 5078 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5079 "vhsub", "s", int_arm_neon_vhsubs, 0>; 5080defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 5081 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5082 "vhsub", "u", int_arm_neon_vhsubu, 0>; 5083// VQSUB : Vector Saturing Subtract 5084defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 5085 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5086 "vqsub", "s", ssubsat, 0>; 5087defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 5088 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5089 "vqsub", "u", usubsat, 0>; 5090// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 5091defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 5092// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 5093defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 5094 int_arm_neon_vrsubhn, 0>; 5095 5096let Predicates = [HasNEON] in { 5097def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 5098 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 5099def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 5100 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 5101def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 5102 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 5103} 5104 5105// Vector Comparisons. 5106 5107// VCEQ : Vector Compare Equal 5108defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5109 IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; 5110def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 5111 ARMCCeq, 1>; 5112def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 5113 ARMCCeq, 1>; 5114def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 5115 ARMCCeq, 1>, 5116 Requires<[HasNEON, HasFullFP16]>; 5117def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 5118 ARMCCeq, 1>, 5119 Requires<[HasNEON, HasFullFP16]>; 5120 5121let TwoOperandAliasConstraint = "$Vm = $Vd" in 5122defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5123 "$Vd, $Vm, #0", ARMCCeq>; 5124 5125// VCGE : Vector Compare Greater Than or Equal 5126defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5127 IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; 5128defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5129 IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; 5130def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5131 ARMCCge, 0>; 5132def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5133 ARMCCge, 0>; 5134def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5135 ARMCCge, 0>, 5136 Requires<[HasNEON, HasFullFP16]>; 5137def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5138 ARMCCge, 0>, 5139 Requires<[HasNEON, HasFullFP16]>; 5140 5141let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5142defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5143 "$Vd, $Vm, #0", ARMCCge>; 5144defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5145 "$Vd, $Vm, #0", ARMCCle>; 5146} 5147 5148// VCGT : Vector Compare Greater Than 5149defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5150 IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; 5151defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5152 IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; 5153def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5154 ARMCCgt, 0>; 5155def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5156 ARMCCgt, 0>; 5157def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5158 ARMCCgt, 0>, 5159 Requires<[HasNEON, HasFullFP16]>; 5160def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5161 ARMCCgt, 0>, 5162 Requires<[HasNEON, HasFullFP16]>; 5163 5164let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5165defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5166 "$Vd, $Vm, #0", ARMCCgt>; 5167defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5168 "$Vd, $Vm, #0", ARMCClt>; 5169} 5170 5171// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5172def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5173 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5174def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5175 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5176def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5177 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5178 Requires<[HasNEON, HasFullFP16]>; 5179def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5180 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5181 Requires<[HasNEON, HasFullFP16]>; 5182// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5183def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5184 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5185def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5186 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5187def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5188 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5189 Requires<[HasNEON, HasFullFP16]>; 5190def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5191 "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>, 5192 Requires<[HasNEON, HasFullFP16]>; 5193// VTST : Vector Test Bits 5194defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5195 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5196 5197def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5198 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5199def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5200 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5201def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5202 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5203def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5204 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5205let Predicates = [HasNEON, HasFullFP16] in { 5206def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5207 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5208def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5209 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5210def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5211 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5212def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5213 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5214} 5215 5216// +fp16fml Floating Point Multiplication Variants 5217let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { 5218 5219class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, 5220 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5221 : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5222 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5223 5224class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, 5225 RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> 5226 : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, 5227 asm, "f16", "$Vd, $Vn, $Vm", "", []>; 5228 5229// Vd, Vs, Vs[0-15], Idx[0-1] 5230class VFMD<string opc, string type, bits<2> S> 5231 : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), 5232 (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx), 5233 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5234 bit idx; 5235 let Inst{3} = idx; 5236 let Inst{19-16} = Vn{4-1}; 5237 let Inst{7} = Vn{0}; 5238 let Inst{5} = Vm{0}; 5239 let Inst{2-0} = Vm{3-1}; 5240} 5241 5242// Vq, Vd, Vd[0-7], Idx[0-3] 5243class VFMQ<string opc, string type, bits<2> S> 5244 : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), 5245 (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 5246 IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> { 5247 bits<2> idx; 5248 let Inst{5} = idx{1}; 5249 let Inst{3} = idx{0}; 5250} 5251 5252// op1 op2 op3 5253def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; 5254def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; 5255def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; 5256def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; 5257def VFMALDI : VFMD<"vfmal", "f16", 0b00>; 5258def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>; 5259def VFMALQI : VFMQ<"vfmal", "f16", 0b00>; 5260def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>; 5261} // HasNEON, HasFP16FML 5262 5263 5264def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5265 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5266def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5267 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5268def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5269 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5270def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5271 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5272let Predicates = [HasNEON, HasFullFP16] in { 5273def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5274 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5275def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5276 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5277def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5278 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5279def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5280 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5281} 5282 5283// Vector Bitwise Operations. 5284 5285def vnotd : PatFrag<(ops node:$in), 5286 (xor node:$in, ARMimmAllOnesD)>; 5287def vnotq : PatFrag<(ops node:$in), 5288 (xor node:$in, ARMimmAllOnesV)>; 5289 5290 5291// VAND : Vector Bitwise AND 5292def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5293 v2i32, v2i32, and, 1>; 5294def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5295 v4i32, v4i32, and, 1>; 5296 5297// VEOR : Vector Bitwise Exclusive OR 5298def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5299 v2i32, v2i32, xor, 1>; 5300def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5301 v4i32, v4i32, xor, 1>; 5302 5303// VORR : Vector Bitwise OR 5304def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5305 v2i32, v2i32, or, 1>; 5306def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5307 v4i32, v4i32, or, 1>; 5308 5309def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5310 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5311 IIC_VMOVImm, 5312 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5313 [(set DPR:$Vd, 5314 (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5315 let Inst{9} = SIMM{9}; 5316} 5317 5318def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5319 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5320 IIC_VMOVImm, 5321 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5322 [(set DPR:$Vd, 5323 (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { 5324 let Inst{10-9} = SIMM{10-9}; 5325} 5326 5327def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5328 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5329 IIC_VMOVImm, 5330 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5331 [(set QPR:$Vd, 5332 (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5333 let Inst{9} = SIMM{9}; 5334} 5335 5336def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5337 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5338 IIC_VMOVImm, 5339 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5340 [(set QPR:$Vd, 5341 (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { 5342 let Inst{10-9} = SIMM{10-9}; 5343} 5344 5345 5346// VBIC : Vector Bitwise Bit Clear (AND NOT) 5347let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5348def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5349 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5350 "vbic", "$Vd, $Vn, $Vm", "", 5351 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5352 (vnotd DPR:$Vm))))]>; 5353def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5354 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5355 "vbic", "$Vd, $Vn, $Vm", "", 5356 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5357 (vnotq QPR:$Vm))))]>; 5358} 5359 5360def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5361 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5362 IIC_VMOVImm, 5363 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5364 [(set DPR:$Vd, 5365 (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5366 let Inst{9} = SIMM{9}; 5367} 5368 5369def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5370 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5371 IIC_VMOVImm, 5372 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5373 [(set DPR:$Vd, 5374 (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { 5375 let Inst{10-9} = SIMM{10-9}; 5376} 5377 5378def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5379 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5380 IIC_VMOVImm, 5381 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5382 [(set QPR:$Vd, 5383 (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5384 let Inst{9} = SIMM{9}; 5385} 5386 5387def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5388 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5389 IIC_VMOVImm, 5390 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5391 [(set QPR:$Vd, 5392 (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { 5393 let Inst{10-9} = SIMM{10-9}; 5394} 5395 5396// VORN : Vector Bitwise OR NOT 5397def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5398 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5399 "vorn", "$Vd, $Vn, $Vm", "", 5400 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5401 (vnotd DPR:$Vm))))]>; 5402def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5403 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5404 "vorn", "$Vd, $Vn, $Vm", "", 5405 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5406 (vnotq QPR:$Vm))))]>; 5407 5408// VMVN : Vector Bitwise NOT (Immediate) 5409 5410let isReMaterializable = 1 in { 5411 5412def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5413 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5414 "vmvn", "i16", "$Vd, $SIMM", "", 5415 [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> { 5416 let Inst{9} = SIMM{9}; 5417} 5418 5419def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5420 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5421 "vmvn", "i16", "$Vd, $SIMM", "", 5422 [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> { 5423 let Inst{9} = SIMM{9}; 5424} 5425 5426def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5427 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5428 "vmvn", "i32", "$Vd, $SIMM", "", 5429 [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> { 5430 let Inst{11-8} = SIMM{11-8}; 5431} 5432 5433def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5434 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5435 "vmvn", "i32", "$Vd, $SIMM", "", 5436 [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> { 5437 let Inst{11-8} = SIMM{11-8}; 5438} 5439} 5440 5441// VMVN : Vector Bitwise NOT 5442def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5443 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5444 "vmvn", "$Vd, $Vm", "", 5445 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5446def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5447 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5448 "vmvn", "$Vd, $Vm", "", 5449 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5450let Predicates = [HasNEON] in { 5451def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5452def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5453} 5454 5455// The TwoAddress pass will not go looking for equivalent operations 5456// with different register constraints; it just inserts copies. 5457// That is why pseudo VBSP implemented. Is is expanded later into 5458// VBIT/VBIF/VBSL taking into account register constraints to avoid copies. 5459def VBSPd 5460 : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5461 IIC_VBINiD, "", 5462 [(set DPR:$Vd, 5463 (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5464let Predicates = [HasNEON] in { 5465def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5466 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5467 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5468def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5469 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5470 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5471def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5472 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5473 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5474def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5475 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5476 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5477def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5478 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5479 (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>; 5480 5481def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5482 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5483 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5484 5485def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5486 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5487 (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; 5488} 5489 5490def VBSPq 5491 : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5492 IIC_VBINiQ, "", 5493 [(set QPR:$Vd, 5494 (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5495let Predicates = [HasNEON] in { 5496def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5497 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5498 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5499def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5500 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5501 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5502def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5503 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5504 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5505def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5506 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5507 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5508def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5509 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5510 (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>; 5511 5512def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5513 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5514 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5515def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5516 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5517 (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; 5518} 5519 5520// VBSL : Vector Bitwise Select 5521def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5522 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5523 N3RegFrm, IIC_VBINiD, 5524 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5525 []>; 5526 5527def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5528 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5529 N3RegFrm, IIC_VBINiQ, 5530 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5531 []>; 5532 5533// VBIF : Vector Bitwise Insert if False 5534// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5535def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5536 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5537 N3RegFrm, IIC_VBINiD, 5538 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5539 []>; 5540def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5541 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5542 N3RegFrm, IIC_VBINiQ, 5543 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5544 []>; 5545 5546// VBIT : Vector Bitwise Insert if True 5547// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5548def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5549 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5550 N3RegFrm, IIC_VBINiD, 5551 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5552 []>; 5553def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5554 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5555 N3RegFrm, IIC_VBINiQ, 5556 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5557 []>; 5558 5559// Vector Absolute Differences. 5560 5561// VABD : Vector Absolute Difference 5562defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5563 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5564 "vabd", "s", int_arm_neon_vabds, 1>; 5565defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5566 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5567 "vabd", "u", int_arm_neon_vabdu, 1>; 5568def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5569 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5570def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5571 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5572def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5573 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5574 Requires<[HasNEON, HasFullFP16]>; 5575def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5576 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5577 Requires<[HasNEON, HasFullFP16]>; 5578 5579// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5580defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5581 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5582defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5583 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5584 5585let Predicates = [HasNEON] in { 5586def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5587 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5588def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5589 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5590} 5591 5592// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5593// shift/xor pattern for ABS. 5594 5595def abd_shr : 5596 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5597 (ARMvshrsImm (sub (zext node:$in1), 5598 (zext node:$in2)), (i32 $shift))>; 5599 5600let Predicates = [HasNEON] in { 5601def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5602 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5603 (zext (v2i32 DPR:$opB))), 5604 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5605 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5606} 5607 5608// VABA : Vector Absolute Difference and Accumulate 5609defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5610 "vaba", "s", int_arm_neon_vabds, add>; 5611defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5612 "vaba", "u", int_arm_neon_vabdu, add>; 5613 5614// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5615defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5616 "vabal", "s", int_arm_neon_vabds, zext, add>; 5617defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5618 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5619 5620// Vector Maximum and Minimum. 5621 5622// VMAX : Vector Maximum 5623defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5624 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5625 "vmax", "s", smax, 1>; 5626defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5627 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5628 "vmax", "u", umax, 1>; 5629def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5630 "vmax", "f32", 5631 v2f32, v2f32, fmaximum, 1>; 5632def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5633 "vmax", "f32", 5634 v4f32, v4f32, fmaximum, 1>; 5635def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5636 "vmax", "f16", 5637 v4f16, v4f16, fmaximum, 1>, 5638 Requires<[HasNEON, HasFullFP16]>; 5639def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5640 "vmax", "f16", 5641 v8f16, v8f16, fmaximum, 1>, 5642 Requires<[HasNEON, HasFullFP16]>; 5643 5644// VMAXNM 5645let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5646 def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5647 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5648 v2f32, v2f32, fmaxnum, 1>, 5649 Requires<[HasV8, HasNEON]>; 5650 def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5651 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5652 v4f32, v4f32, fmaxnum, 1>, 5653 Requires<[HasV8, HasNEON]>; 5654 def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5655 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5656 v4f16, v4f16, fmaxnum, 1>, 5657 Requires<[HasV8, HasNEON, HasFullFP16]>; 5658 def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5659 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5660 v8f16, v8f16, fmaxnum, 1>, 5661 Requires<[HasV8, HasNEON, HasFullFP16]>; 5662} 5663 5664// VMIN : Vector Minimum 5665defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5666 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5667 "vmin", "s", smin, 1>; 5668defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5669 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5670 "vmin", "u", umin, 1>; 5671def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5672 "vmin", "f32", 5673 v2f32, v2f32, fminimum, 1>; 5674def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5675 "vmin", "f32", 5676 v4f32, v4f32, fminimum, 1>; 5677def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5678 "vmin", "f16", 5679 v4f16, v4f16, fminimum, 1>, 5680 Requires<[HasNEON, HasFullFP16]>; 5681def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5682 "vmin", "f16", 5683 v8f16, v8f16, fminimum, 1>, 5684 Requires<[HasNEON, HasFullFP16]>; 5685 5686// VMINNM 5687let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5688 def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5689 N3RegFrm, NoItinerary, "vminnm", "f32", 5690 v2f32, v2f32, fminnum, 1>, 5691 Requires<[HasV8, HasNEON]>; 5692 def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5693 N3RegFrm, NoItinerary, "vminnm", "f32", 5694 v4f32, v4f32, fminnum, 1>, 5695 Requires<[HasV8, HasNEON]>; 5696 def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5697 N3RegFrm, NoItinerary, "vminnm", "f16", 5698 v4f16, v4f16, fminnum, 1>, 5699 Requires<[HasV8, HasNEON, HasFullFP16]>; 5700 def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5701 N3RegFrm, NoItinerary, "vminnm", "f16", 5702 v8f16, v8f16, fminnum, 1>, 5703 Requires<[HasV8, HasNEON, HasFullFP16]>; 5704} 5705 5706// Vector Pairwise Operations. 5707 5708// VPADD : Vector Pairwise Add 5709def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5710 "vpadd", "i8", 5711 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5712def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5713 "vpadd", "i16", 5714 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5715def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5716 "vpadd", "i32", 5717 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5718def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5719 IIC_VPBIND, "vpadd", "f32", 5720 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5721def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5722 IIC_VPBIND, "vpadd", "f16", 5723 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5724 Requires<[HasNEON, HasFullFP16]>; 5725 5726// VPADDL : Vector Pairwise Add Long 5727defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5728 int_arm_neon_vpaddls>; 5729defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5730 int_arm_neon_vpaddlu>; 5731 5732// VPADAL : Vector Pairwise Add and Accumulate Long 5733defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5734 int_arm_neon_vpadals>; 5735defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5736 int_arm_neon_vpadalu>; 5737 5738// VPMAX : Vector Pairwise Maximum 5739def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5740 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5741def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5742 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5743def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5744 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5745def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5746 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5747def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5748 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5749def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5750 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5751def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5752 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5753def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5754 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5755 Requires<[HasNEON, HasFullFP16]>; 5756 5757// VPMIN : Vector Pairwise Minimum 5758def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5759 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5760def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5761 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5762def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5763 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5764def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5765 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5766def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5767 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5768def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5769 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5770def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5771 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5772def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5773 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5774 Requires<[HasNEON, HasFullFP16]>; 5775 5776// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5777 5778// VRECPE : Vector Reciprocal Estimate 5779def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5780 IIC_VUNAD, "vrecpe", "u32", 5781 v2i32, v2i32, int_arm_neon_vrecpe>; 5782def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5783 IIC_VUNAQ, "vrecpe", "u32", 5784 v4i32, v4i32, int_arm_neon_vrecpe>; 5785def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5786 IIC_VUNAD, "vrecpe", "f32", 5787 v2f32, v2f32, int_arm_neon_vrecpe>; 5788def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5789 IIC_VUNAQ, "vrecpe", "f32", 5790 v4f32, v4f32, int_arm_neon_vrecpe>; 5791def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5792 IIC_VUNAD, "vrecpe", "f16", 5793 v4f16, v4f16, int_arm_neon_vrecpe>, 5794 Requires<[HasNEON, HasFullFP16]>; 5795def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5796 IIC_VUNAQ, "vrecpe", "f16", 5797 v8f16, v8f16, int_arm_neon_vrecpe>, 5798 Requires<[HasNEON, HasFullFP16]>; 5799 5800// VRECPS : Vector Reciprocal Step 5801def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5802 IIC_VRECSD, "vrecps", "f32", 5803 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5804def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5805 IIC_VRECSQ, "vrecps", "f32", 5806 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5807def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5808 IIC_VRECSD, "vrecps", "f16", 5809 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5810 Requires<[HasNEON, HasFullFP16]>; 5811def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5812 IIC_VRECSQ, "vrecps", "f16", 5813 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5814 Requires<[HasNEON, HasFullFP16]>; 5815 5816// VRSQRTE : Vector Reciprocal Square Root Estimate 5817def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5818 IIC_VUNAD, "vrsqrte", "u32", 5819 v2i32, v2i32, int_arm_neon_vrsqrte>; 5820def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5821 IIC_VUNAQ, "vrsqrte", "u32", 5822 v4i32, v4i32, int_arm_neon_vrsqrte>; 5823def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5824 IIC_VUNAD, "vrsqrte", "f32", 5825 v2f32, v2f32, int_arm_neon_vrsqrte>; 5826def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5827 IIC_VUNAQ, "vrsqrte", "f32", 5828 v4f32, v4f32, int_arm_neon_vrsqrte>; 5829def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5830 IIC_VUNAD, "vrsqrte", "f16", 5831 v4f16, v4f16, int_arm_neon_vrsqrte>, 5832 Requires<[HasNEON, HasFullFP16]>; 5833def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5834 IIC_VUNAQ, "vrsqrte", "f16", 5835 v8f16, v8f16, int_arm_neon_vrsqrte>, 5836 Requires<[HasNEON, HasFullFP16]>; 5837 5838// VRSQRTS : Vector Reciprocal Square Root Step 5839def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5840 IIC_VRECSD, "vrsqrts", "f32", 5841 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5842def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5843 IIC_VRECSQ, "vrsqrts", "f32", 5844 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5845def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5846 IIC_VRECSD, "vrsqrts", "f16", 5847 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5848 Requires<[HasNEON, HasFullFP16]>; 5849def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5850 IIC_VRECSQ, "vrsqrts", "f16", 5851 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5852 Requires<[HasNEON, HasFullFP16]>; 5853 5854// Vector Shifts. 5855 5856// VSHL : Vector Shift 5857defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5858 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5859 "vshl", "s", int_arm_neon_vshifts>; 5860defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5861 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5862 "vshl", "u", int_arm_neon_vshiftu>; 5863 5864let Predicates = [HasNEON] in { 5865def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5866 (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; 5867def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5868 (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; 5869def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5870 (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; 5871def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5872 (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; 5873def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5874 (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; 5875def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5876 (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; 5877def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5878 (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; 5879def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5880 (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; 5881 5882def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), 5883 (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; 5884def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), 5885 (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; 5886def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), 5887 (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; 5888def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), 5889 (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; 5890def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), 5891 (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; 5892def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), 5893 (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; 5894def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), 5895 (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; 5896def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), 5897 (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; 5898 5899} 5900 5901// VSHL : Vector Shift Left (Immediate) 5902defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; 5903 5904// VSHR : Vector Shift Right (Immediate) 5905defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5906 ARMvshrsImm>; 5907defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5908 ARMvshruImm>; 5909 5910// VSHLL : Vector Shift Left Long 5911defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5912 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; 5913defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5914 PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; 5915 5916// VSHLL : Vector Shift Left Long (with maximum shift count) 5917class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5918 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5919 ValueType OpTy, Operand ImmTy> 5920 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5921 ResTy, OpTy, ImmTy, null_frag> { 5922 let Inst{21-16} = op21_16; 5923 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5924} 5925def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5926 v8i16, v8i8, imm8>; 5927def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5928 v4i32, v4i16, imm16>; 5929def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5930 v2i64, v2i32, imm32>; 5931 5932let Predicates = [HasNEON] in { 5933def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), 5934 (VSHLLi8 DPR:$Rn, 8)>; 5935def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), 5936 (VSHLLi16 DPR:$Rn, 16)>; 5937def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), 5938 (VSHLLi32 DPR:$Rn, 32)>; 5939def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), 5940 (VSHLLi8 DPR:$Rn, 8)>; 5941def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), 5942 (VSHLLi16 DPR:$Rn, 16)>; 5943def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), 5944 (VSHLLi32 DPR:$Rn, 32)>; 5945def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), 5946 (VSHLLi8 DPR:$Rn, 8)>; 5947def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), 5948 (VSHLLi16 DPR:$Rn, 16)>; 5949def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), 5950 (VSHLLi32 DPR:$Rn, 32)>; 5951} 5952 5953// VSHRN : Vector Shift Right and Narrow 5954defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5955 PatFrag<(ops node:$Rn, node:$amt), 5956 (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; 5957 5958let Predicates = [HasNEON] in { 5959def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), 5960 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5961def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), 5962 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5963def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), 5964 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5965} 5966 5967// VRSHL : Vector Rounding Shift 5968defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5969 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5970 "vrshl", "s", int_arm_neon_vrshifts>; 5971defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5972 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5973 "vrshl", "u", int_arm_neon_vrshiftu>; 5974// VRSHR : Vector Rounding Shift Right 5975defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5976 NEONvrshrsImm>; 5977defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5978 NEONvrshruImm>; 5979 5980// VRSHRN : Vector Rounding Shift Right and Narrow 5981defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5982 NEONvrshrnImm>; 5983 5984// VQSHL : Vector Saturating Shift 5985defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5986 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5987 "vqshl", "s", int_arm_neon_vqshifts>; 5988defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5989 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5990 "vqshl", "u", int_arm_neon_vqshiftu>; 5991// VQSHL : Vector Saturating Shift Left (Immediate) 5992defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>; 5993defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>; 5994 5995// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5996defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>; 5997 5998// VQSHRN : Vector Saturating Shift Right and Narrow 5999defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 6000 NEONvqshrnsImm>; 6001defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 6002 NEONvqshrnuImm>; 6003 6004// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 6005defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 6006 NEONvqshrnsuImm>; 6007 6008// VQRSHL : Vector Saturating Rounding Shift 6009defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 6010 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6011 "vqrshl", "s", int_arm_neon_vqrshifts>; 6012defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 6013 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 6014 "vqrshl", "u", int_arm_neon_vqrshiftu>; 6015 6016// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 6017defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 6018 NEONvqrshrnsImm>; 6019defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 6020 NEONvqrshrnuImm>; 6021 6022// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 6023defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 6024 NEONvqrshrnsuImm>; 6025 6026// VSRA : Vector Shift Right and Accumulate 6027defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; 6028defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; 6029// VRSRA : Vector Rounding Shift Right and Accumulate 6030defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; 6031defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; 6032 6033// VSLI : Vector Shift Left and Insert 6034defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 6035 6036// VSRI : Vector Shift Right and Insert 6037defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 6038 6039// Vector Absolute and Saturating Absolute. 6040 6041// VABS : Vector Absolute Value 6042defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 6043 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 6044def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6045 "vabs", "f32", 6046 v2f32, v2f32, fabs>; 6047def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 6048 "vabs", "f32", 6049 v4f32, v4f32, fabs>; 6050def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6051 "vabs", "f16", 6052 v4f16, v4f16, fabs>, 6053 Requires<[HasNEON, HasFullFP16]>; 6054def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 6055 "vabs", "f16", 6056 v8f16, v8f16, fabs>, 6057 Requires<[HasNEON, HasFullFP16]>; 6058 6059// VQABS : Vector Saturating Absolute Value 6060defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 6061 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 6062 int_arm_neon_vqabs>; 6063 6064// Vector Negate. 6065 6066def vnegd : PatFrag<(ops node:$in), 6067 (sub ARMimmAllZerosD, node:$in)>; 6068def vnegq : PatFrag<(ops node:$in), 6069 (sub ARMimmAllZerosV, node:$in)>; 6070 6071class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6072 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 6073 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 6074 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 6075class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 6076 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 6077 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 6078 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 6079 6080// VNEG : Vector Negate (integer) 6081def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 6082def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 6083def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 6084def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 6085def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 6086def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 6087 6088// VNEG : Vector Negate (floating-point) 6089def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 6090 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6091 "vneg", "f32", "$Vd, $Vm", "", 6092 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 6093def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 6094 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6095 "vneg", "f32", "$Vd, $Vm", "", 6096 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 6097def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 6098 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 6099 "vneg", "f16", "$Vd, $Vm", "", 6100 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 6101 Requires<[HasNEON, HasFullFP16]>; 6102def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 6103 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 6104 "vneg", "f16", "$Vd, $Vm", "", 6105 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 6106 Requires<[HasNEON, HasFullFP16]>; 6107 6108let Predicates = [HasNEON] in { 6109def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 6110def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 6111def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 6112def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 6113def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 6114def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 6115} 6116 6117// VQNEG : Vector Saturating Negate 6118defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 6119 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 6120 int_arm_neon_vqneg>; 6121 6122// Vector Bit Counting Operations. 6123 6124// VCLS : Vector Count Leading Sign Bits 6125defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 6126 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 6127 int_arm_neon_vcls>; 6128// VCLZ : Vector Count Leading Zeros 6129defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 6130 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 6131 ctlz>; 6132// VCNT : Vector Count One Bits 6133def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6134 IIC_VCNTiD, "vcnt", "8", 6135 v8i8, v8i8, ctpop>; 6136def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 6137 IIC_VCNTiQ, "vcnt", "8", 6138 v16i8, v16i8, ctpop>; 6139 6140// Vector Swap 6141def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 6142 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 6143 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6144 []>; 6145def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 6146 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 6147 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 6148 []>; 6149 6150// Vector Move Operations. 6151 6152// VMOV : Vector Move (Register) 6153def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6154 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6155def : NEONInstAlias<"vmov${p} $Vd, $Vm", 6156 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6157 6158// VMOV : Vector Move (Immediate) 6159 6160// Although VMOVs are not strictly speaking cheap, they are as expensive 6161// as their copies counterpart (VORR), so we should prefer rematerialization 6162// over splitting when it applies. 6163let isReMaterializable = 1, isAsCheapAsAMove=1 in { 6164def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 6165 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6166 "vmov", "i8", "$Vd, $SIMM", "", 6167 [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>; 6168def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 6169 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 6170 "vmov", "i8", "$Vd, $SIMM", "", 6171 [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>; 6172 6173def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 6174 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6175 "vmov", "i16", "$Vd, $SIMM", "", 6176 [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> { 6177 let Inst{9} = SIMM{9}; 6178} 6179 6180def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 6181 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 6182 "vmov", "i16", "$Vd, $SIMM", "", 6183 [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> { 6184 let Inst{9} = SIMM{9}; 6185} 6186 6187def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 6188 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6189 "vmov", "i32", "$Vd, $SIMM", "", 6190 [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> { 6191 let Inst{11-8} = SIMM{11-8}; 6192} 6193 6194def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 6195 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 6196 "vmov", "i32", "$Vd, $SIMM", "", 6197 [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> { 6198 let Inst{11-8} = SIMM{11-8}; 6199} 6200 6201def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 6202 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6203 "vmov", "i64", "$Vd, $SIMM", "", 6204 [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>; 6205def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 6206 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 6207 "vmov", "i64", "$Vd, $SIMM", "", 6208 [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>; 6209 6210def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 6211 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6212 "vmov", "f32", "$Vd, $SIMM", "", 6213 [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>; 6214def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 6215 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6216 "vmov", "f32", "$Vd, $SIMM", "", 6217 [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>; 6218} // isReMaterializable, isAsCheapAsAMove 6219 6220// Add support for bytes replication feature, so it could be GAS compatible. 6221multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6222 // E.g. instructions below: 6223 // "vmov.i32 d0, #0xffffffff" 6224 // "vmov.i32 d0, #0xabababab" 6225 // "vmov.i16 d0, #0xabab" 6226 // are incorrect, but we could deal with such cases. 6227 // For last two instructions, for example, it should emit: 6228 // "vmov.i8 d0, #0xab" 6229 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6230 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6231 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6232 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6233 // Also add same support for VMVN instructions. So instruction: 6234 // "vmvn.i32 d0, #0xabababab" 6235 // actually means: 6236 // "vmov.i8 d0, #0x54" 6237 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6238 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6239 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6240 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6241} 6242 6243defm : NEONImmReplicateI8InstAlias<i16>; 6244defm : NEONImmReplicateI8InstAlias<i32>; 6245defm : NEONImmReplicateI8InstAlias<i64>; 6246 6247// Similar to above for types other than i8, e.g.: 6248// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6249// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6250// In this case we do not canonicalize VMVN to VMOV 6251multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6252 NeonI NV8, NeonI NV16, ValueType To> { 6253 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6254 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6255 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6256 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6257 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6258 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6259 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6260 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6261} 6262 6263defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6264 VMVNv4i16, VMVNv8i16, i32>; 6265defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6266 VMVNv4i16, VMVNv8i16, i64>; 6267defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6268 VMVNv2i32, VMVNv4i32, i64>; 6269// TODO: add "VMOV <-> VMVN" conversion for cases like 6270// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6271// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6272 6273// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6274// require zero cycles to execute so they should be used wherever possible for 6275// setting a register to zero. 6276 6277// Even without these pseudo-insts we would probably end up with the correct 6278// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6279// since they are sometimes rather expensive (in general). 6280 6281let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6282 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6283 [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))], 6284 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6285 Requires<[HasZCZ]>; 6286 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6287 [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))], 6288 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6289 Requires<[HasZCZ]>; 6290} 6291 6292// VMOV : Vector Get Lane (move scalar to ARM core register) 6293 6294def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6295 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6296 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6297 [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V), 6298 imm:$lane))]> { 6299 let Inst{21} = lane{2}; 6300 let Inst{6-5} = lane{1-0}; 6301} 6302def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6303 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6304 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6305 [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V), 6306 imm:$lane))]> { 6307 let Inst{21} = lane{1}; 6308 let Inst{6} = lane{0}; 6309} 6310def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6311 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6312 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6313 [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V), 6314 imm:$lane))]> { 6315 let Inst{21} = lane{2}; 6316 let Inst{6-5} = lane{1-0}; 6317} 6318def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6319 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6320 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6321 [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V), 6322 imm:$lane))]> { 6323 let Inst{21} = lane{1}; 6324 let Inst{6} = lane{0}; 6325} 6326def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6327 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6328 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6329 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6330 imm:$lane))]>, 6331 Requires<[HasFPRegs, HasFastVGETLNi32]> { 6332 let Inst{21} = lane{0}; 6333} 6334let Predicates = [HasNEON] in { 6335// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6336def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane), 6337 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6338 (DSubReg_i8_reg imm:$lane))), 6339 (SubReg_i8_lane imm:$lane))>; 6340def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane), 6341 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6342 (DSubReg_i16_reg imm:$lane))), 6343 (SubReg_i16_lane imm:$lane))>; 6344def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane), 6345 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6346 (DSubReg_i8_reg imm:$lane))), 6347 (SubReg_i8_lane imm:$lane))>; 6348def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane), 6349 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6350 (DSubReg_i16_reg imm:$lane))), 6351 (SubReg_i16_lane imm:$lane))>; 6352} 6353def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6354 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6355 (DSubReg_i32_reg imm:$lane))), 6356 (SubReg_i32_lane imm:$lane))>, 6357 Requires<[HasNEON, HasFastVGETLNi32]>; 6358def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6359 (COPY_TO_REGCLASS 6360 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6361 Requires<[HasNEON, HasSlowVGETLNi32]>; 6362def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6363 (COPY_TO_REGCLASS 6364 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6365 Requires<[HasNEON, HasSlowVGETLNi32]>; 6366let Predicates = [HasNEON] in { 6367def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6368 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6369 (SSubReg_f32_reg imm:$src2))>; 6370def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6371 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6372 (SSubReg_f32_reg imm:$src2))>; 6373//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6374// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6375def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6376 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6377} 6378 6379multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> { 6380 def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane), 6381 (EXTRACT_SUBREG 6382 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6383 (SSubReg_f16_reg imm_even:$lane))>; 6384 def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane), 6385 (EXTRACT_SUBREG 6386 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6387 (SSubReg_f16_reg imm_even:$lane))>; 6388} 6389 6390multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> { 6391 def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane), 6392 (COPY_TO_REGCLASS 6393 (VMOVH (EXTRACT_SUBREG 6394 (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)), 6395 (SSubReg_f16_reg imm_odd:$lane))), 6396 HPR)>; 6397 def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane), 6398 (COPY_TO_REGCLASS 6399 (VMOVH (EXTRACT_SUBREG 6400 (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)), 6401 (SSubReg_f16_reg imm_odd:$lane))), 6402 HPR)>; 6403} 6404 6405let Predicates = [HasNEON] in { 6406 defm : ExtractEltEvenF16<v4f16, v8f16>; 6407 defm : ExtractEltOddF16VMOVH<v4f16, v8f16>; 6408} 6409 6410let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in { 6411 // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes 6412 defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>; 6413} 6414 6415let Predicates = [HasBF16, HasNEON] in { 6416 defm : ExtractEltEvenF16<v4bf16, v8bf16>; 6417 6418 // Otherwise, if VMOVH is not available resort to extracting the odd lane 6419 // into a GPR and then moving to HPR 6420 def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane), 6421 (COPY_TO_REGCLASS 6422 (VGETLNu16 (v4bf16 DPR:$src), imm:$lane), 6423 HPR)>; 6424 6425 def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane), 6426 (COPY_TO_REGCLASS 6427 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6428 (DSubReg_i16_reg imm:$lane))), 6429 (SubReg_i16_lane imm:$lane)), 6430 HPR)>; 6431} 6432 6433// VMOV : Vector Set Lane (move ARM core register to scalar) 6434 6435let Constraints = "$src1 = $V" in { 6436def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6437 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6438 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6439 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6440 GPR:$R, imm:$lane))]> { 6441 let Inst{21} = lane{2}; 6442 let Inst{6-5} = lane{1-0}; 6443} 6444def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6445 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6446 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6447 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6448 GPR:$R, imm:$lane))]> { 6449 let Inst{21} = lane{1}; 6450 let Inst{6} = lane{0}; 6451} 6452def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6453 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6454 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6455 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6456 GPR:$R, imm:$lane))]>, 6457 Requires<[HasVFP2]> { 6458 let Inst{21} = lane{0}; 6459 // This instruction is equivalent as 6460 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6461 let isInsertSubreg = 1; 6462} 6463} 6464 6465// TODO: for odd lanes we could optimize this a bit by using the VINS 6466// FullFP16 instruction when it is available 6467multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> { 6468 def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6469 (VT4 (VSETLNi16 DPR:$src1, 6470 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>; 6471 def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane), 6472 (VT8 (INSERT_SUBREG QPR:$src1, 6473 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6474 (DSubReg_i16_reg imm:$lane))), 6475 (COPY_TO_REGCLASS HPR:$src2, GPR), 6476 (SubReg_i16_lane imm:$lane))), 6477 (DSubReg_i16_reg imm:$lane)))>; 6478} 6479 6480let Predicates = [HasNEON] in { 6481def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6482 (v16i8 (INSERT_SUBREG QPR:$src1, 6483 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6484 (DSubReg_i8_reg imm:$lane))), 6485 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6486 (DSubReg_i8_reg imm:$lane)))>; 6487def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6488 (v8i16 (INSERT_SUBREG QPR:$src1, 6489 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6490 (DSubReg_i16_reg imm:$lane))), 6491 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6492 (DSubReg_i16_reg imm:$lane)))>; 6493def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6494 (v4i32 (INSERT_SUBREG QPR:$src1, 6495 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6496 (DSubReg_i32_reg imm:$lane))), 6497 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6498 (DSubReg_i32_reg imm:$lane)))>; 6499 6500def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6501 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6502 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6503def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6504 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6505 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6506 6507defm : InsertEltF16<f16, v4f16, v8f16>; 6508 6509def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6510 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6511 6512def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6513 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6514def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6515 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6516def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6517 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6518 6519def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))), 6520 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6521def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))), 6522 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>; 6523 6524def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6525 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6526def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6527 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6528def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6529 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6530 6531def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6532 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6533 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6534 dsub_0)>; 6535def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6536 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6537 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6538 dsub_0)>; 6539def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6540 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6541 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6542 dsub_0)>; 6543} 6544 6545let Predicates = [HasNEON, HasBF16] in 6546defm : InsertEltF16<bf16, v4bf16, v8bf16>; 6547 6548// VDUP : Vector Duplicate (from ARM core register to all elements) 6549 6550class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6551 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6552 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6553 [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6554class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6555 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6556 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6557 [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>; 6558 6559def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6560def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6561def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6562 Requires<[HasNEON, HasFastVDUP32]>; 6563def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6564def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6565def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6566 6567// ARMvdup patterns for uarchs with fast VDUP.32. 6568def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6569 Requires<[HasNEON,HasFastVDUP32]>; 6570def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>, 6571 Requires<[HasNEON]>; 6572 6573// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6574def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6575 Requires<[HasNEON,HasSlowVDUP32]>; 6576def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6577 Requires<[HasNEON,HasSlowVDUP32]>; 6578 6579// VDUP : Vector Duplicate Lane (from scalar to all elements) 6580 6581class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6582 ValueType Ty, Operand IdxTy> 6583 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6584 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6585 [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6586 6587class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6588 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6589 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6590 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6591 [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm), 6592 VectorIndex32:$lane)))]>; 6593 6594// Inst{19-16} is partially specified depending on the element size. 6595 6596def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6597 bits<3> lane; 6598 let Inst{19-17} = lane{2-0}; 6599} 6600def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6601 bits<2> lane; 6602 let Inst{19-18} = lane{1-0}; 6603} 6604def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6605 bits<1> lane; 6606 let Inst{19} = lane{0}; 6607} 6608def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6609 bits<3> lane; 6610 let Inst{19-17} = lane{2-0}; 6611} 6612def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6613 bits<2> lane; 6614 let Inst{19-18} = lane{1-0}; 6615} 6616def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6617 bits<1> lane; 6618 let Inst{19} = lane{0}; 6619} 6620 6621let Predicates = [HasNEON] in { 6622def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)), 6623 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6624 6625def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6626 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6627 6628def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)), 6629 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6630 6631def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)), 6632 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6633 (DSubReg_i8_reg imm:$lane))), 6634 (SubReg_i8_lane imm:$lane)))>; 6635def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)), 6636 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6637 (DSubReg_i16_reg imm:$lane))), 6638 (SubReg_i16_lane imm:$lane)))>; 6639def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)), 6640 (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src, 6641 (DSubReg_i16_reg imm:$lane))), 6642 (SubReg_i16_lane imm:$lane)))>; 6643def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)), 6644 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6645 (DSubReg_i32_reg imm:$lane))), 6646 (SubReg_i32_lane imm:$lane)))>; 6647def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)), 6648 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6649 (DSubReg_i32_reg imm:$lane))), 6650 (SubReg_i32_lane imm:$lane)))>; 6651 6652def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))), 6653 (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6654 (f16 HPR:$src), ssub_0), (i32 0)))>; 6655def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))), 6656 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6657 SPR:$src, ssub_0), (i32 0)))>; 6658def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))), 6659 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6660 SPR:$src, ssub_0), (i32 0)))>; 6661def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))), 6662 (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), 6663 (f16 HPR:$src), ssub_0), (i32 0)))>; 6664} 6665 6666let Predicates = [HasNEON, HasBF16] in { 6667def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)), 6668 (VDUPLN16d DPR:$Vm, imm:$lane)>; 6669 6670def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)), 6671 (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src, 6672 (DSubReg_i16_reg imm:$lane))), 6673 (SubReg_i16_lane imm:$lane)))>; 6674 6675def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))), 6676 (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6677 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6678def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))), 6679 (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), 6680 (bf16 HPR:$src), ssub_0), (i32 0)))>; 6681} 6682 6683// VMOVN : Vector Narrowing Move 6684defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6685 "vmovn", "i", trunc>; 6686// VQMOVN : Vector Saturating Narrowing Move 6687defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6688 "vqmovn", "s", int_arm_neon_vqmovns>; 6689defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6690 "vqmovn", "u", int_arm_neon_vqmovnu>; 6691defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6692 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6693// VMOVL : Vector Lengthening Move 6694defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6695defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6696 6697let Predicates = [HasNEON] in { 6698def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6699def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6700def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6701} 6702 6703// Vector Conversions. 6704 6705// VCVT : Vector Convert Between Floating-Point and Integers 6706def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6707 v2i32, v2f32, fp_to_sint>; 6708def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6709 v2i32, v2f32, fp_to_uint>; 6710def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6711 v2f32, v2i32, sint_to_fp>; 6712def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6713 v2f32, v2i32, uint_to_fp>; 6714 6715def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6716 v4i32, v4f32, fp_to_sint>; 6717def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6718 v4i32, v4f32, fp_to_uint>; 6719def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6720 v4f32, v4i32, sint_to_fp>; 6721def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6722 v4f32, v4i32, uint_to_fp>; 6723 6724def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6725 v4i16, v4f16, fp_to_sint>, 6726 Requires<[HasNEON, HasFullFP16]>; 6727def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6728 v4i16, v4f16, fp_to_uint>, 6729 Requires<[HasNEON, HasFullFP16]>; 6730def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6731 v4f16, v4i16, sint_to_fp>, 6732 Requires<[HasNEON, HasFullFP16]>; 6733def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6734 v4f16, v4i16, uint_to_fp>, 6735 Requires<[HasNEON, HasFullFP16]>; 6736 6737def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6738 v8i16, v8f16, fp_to_sint>, 6739 Requires<[HasNEON, HasFullFP16]>; 6740def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6741 v8i16, v8f16, fp_to_uint>, 6742 Requires<[HasNEON, HasFullFP16]>; 6743def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6744 v8f16, v8i16, sint_to_fp>, 6745 Requires<[HasNEON, HasFullFP16]>; 6746def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6747 v8f16, v8i16, uint_to_fp>, 6748 Requires<[HasNEON, HasFullFP16]>; 6749 6750// VCVT{A, N, P, M} 6751multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6752 SDPatternOperator IntU> { 6753 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6754 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6755 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6756 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6757 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6758 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6759 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6760 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6761 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6762 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6763 "s16.f16", v4i16, v4f16, IntS>, 6764 Requires<[HasV8, HasNEON, HasFullFP16]>; 6765 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6766 "s16.f16", v8i16, v8f16, IntS>, 6767 Requires<[HasV8, HasNEON, HasFullFP16]>; 6768 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6769 "u16.f16", v4i16, v4f16, IntU>, 6770 Requires<[HasV8, HasNEON, HasFullFP16]>; 6771 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6772 "u16.f16", v8i16, v8f16, IntU>, 6773 Requires<[HasV8, HasNEON, HasFullFP16]>; 6774 } 6775} 6776 6777defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6778defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6779defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6780defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6781 6782// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6783let DecoderMethod = "DecodeVCVTD" in { 6784def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6785 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6786def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6787 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6788def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6789 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6790def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6791 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6792let Predicates = [HasNEON, HasFullFP16] in { 6793def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6794 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6795def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6796 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6797def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6798 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6799def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6800 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6801} // Predicates = [HasNEON, HasFullFP16] 6802} 6803 6804let DecoderMethod = "DecodeVCVTQ" in { 6805def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6806 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6807def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6808 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6809def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6810 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6811def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6812 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6813let Predicates = [HasNEON, HasFullFP16] in { 6814def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6815 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6816def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6817 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6818def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6819 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6820def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6821 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6822} // Predicates = [HasNEON, HasFullFP16] 6823} 6824 6825def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6826 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6827def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6828 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6829def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6830 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6831def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6832 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6833 6834def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6835 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6836def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6837 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6838def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6839 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6840def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6841 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6842 6843def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6844 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6845def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6846 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6847def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6848 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6849def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6850 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6851 6852def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6853 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6854def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6855 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6856def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6857 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6858def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6859 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6860 6861 6862// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6863def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6864 IIC_VUNAQ, "vcvt", "f16.f32", 6865 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6866 Requires<[HasNEON, HasFP16]>; 6867def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6868 IIC_VUNAQ, "vcvt", "f32.f16", 6869 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6870 Requires<[HasNEON, HasFP16]>; 6871 6872// Vector Reverse. 6873 6874// VREV64 : Vector Reverse elements within 64-bit doublewords 6875 6876class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6877 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6878 (ins DPR:$Vm), IIC_VMOVD, 6879 OpcodeStr, Dt, "$Vd, $Vm", "", 6880 [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>; 6881class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6882 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6883 (ins QPR:$Vm), IIC_VMOVQ, 6884 OpcodeStr, Dt, "$Vd, $Vm", "", 6885 [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>; 6886 6887def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6888def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6889def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6890let Predicates = [HasNEON] in { 6891def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6892} 6893 6894def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6895def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6896def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6897 6898let Predicates = [HasNEON] in { 6899 def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), 6900 (VREV64q32 QPR:$Vm)>; 6901 def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), 6902 (VREV64q16 QPR:$Vm)>; 6903 def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), 6904 (VREV64d16 DPR:$Vm)>; 6905} 6906 6907// VREV32 : Vector Reverse elements within 32-bit words 6908 6909class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6910 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6911 (ins DPR:$Vm), IIC_VMOVD, 6912 OpcodeStr, Dt, "$Vd, $Vm", "", 6913 [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>; 6914class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6915 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6916 (ins QPR:$Vm), IIC_VMOVQ, 6917 OpcodeStr, Dt, "$Vd, $Vm", "", 6918 [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>; 6919 6920def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6921def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6922 6923def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6924def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6925 6926let Predicates = [HasNEON] in { 6927 def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))), 6928 (VREV32q16 QPR:$Vm)>; 6929 def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))), 6930 (VREV32d16 DPR:$Vm)>; 6931} 6932 6933// VREV16 : Vector Reverse elements within 16-bit halfwords 6934 6935class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6936 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6937 (ins DPR:$Vm), IIC_VMOVD, 6938 OpcodeStr, Dt, "$Vd, $Vm", "", 6939 [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>; 6940class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6941 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6942 (ins QPR:$Vm), IIC_VMOVQ, 6943 OpcodeStr, Dt, "$Vd, $Vm", "", 6944 [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>; 6945 6946def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6947def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6948 6949// Other Vector Shuffles. 6950 6951// Aligned extractions: really just dropping registers 6952 6953class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6954 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6955 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>, 6956 Requires<[HasNEON]>; 6957 6958def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6959 6960def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6961 6962def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6963 6964def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6965 6966def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6967 6968def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16 6969 6970// VEXT : Vector Extract 6971 6972 6973// All of these have a two-operand InstAlias. 6974let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6975class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6976 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6977 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6978 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6979 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6980 (Ty DPR:$Vm), imm:$index)))]> { 6981 bits<3> index; 6982 let Inst{11} = 0b0; 6983 let Inst{10-8} = index{2-0}; 6984} 6985 6986class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6987 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6988 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6989 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6990 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6991 (Ty QPR:$Vm), imm:$index)))]> { 6992 bits<4> index; 6993 let Inst{11-8} = index{3-0}; 6994} 6995} 6996 6997def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6998 let Inst{10-8} = index{2-0}; 6999} 7000def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 7001 let Inst{10-9} = index{1-0}; 7002 let Inst{8} = 0b0; 7003} 7004let Predicates = [HasNEON] in { 7005def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))), 7006 (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>; 7007} 7008 7009def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 7010 let Inst{10} = index{0}; 7011 let Inst{9-8} = 0b00; 7012} 7013let Predicates = [HasNEON] in { 7014def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))), 7015 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 7016} 7017 7018def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 7019 let Inst{11-8} = index{3-0}; 7020} 7021def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 7022 let Inst{11-9} = index{2-0}; 7023 let Inst{8} = 0b0; 7024} 7025let Predicates = [HasNEON] in { 7026def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))), 7027 (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>; 7028} 7029 7030def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 7031 let Inst{11-10} = index{1-0}; 7032 let Inst{9-8} = 0b00; 7033} 7034def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 7035 let Inst{11} = index{0}; 7036 let Inst{10-8} = 0b000; 7037} 7038let Predicates = [HasNEON] in { 7039def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), 7040 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 7041} 7042 7043// VTRN : Vector Transpose 7044 7045def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 7046def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 7047def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 7048 7049def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 7050def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 7051def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 7052 7053// VUZP : Vector Unzip (Deinterleave) 7054 7055def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 7056def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 7057// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7058def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 7059 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7060 7061def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 7062def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 7063def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 7064 7065// VZIP : Vector Zip (Interleave) 7066 7067def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 7068def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 7069// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 7070def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 7071 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 7072 7073def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 7074def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 7075def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 7076 7077// Vector Table Lookup and Table Extension. 7078 7079// VTBL : Vector Table Lookup 7080let DecoderMethod = "DecodeTBLInstruction" in { 7081def VTBL1 7082 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 7083 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 7084 "vtbl", "8", "$Vd, $Vn, $Vm", "", 7085 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 7086 7087let hasExtraSrcRegAllocReq = 1 in { 7088def VTBL2 7089 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 7090 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 7091 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7092def VTBL3 7093 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 7094 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 7095 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7096def VTBL4 7097 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 7098 (ins VecListFourD:$Vn, DPR:$Vm), 7099 NVTBLFrm, IIC_VTB4, 7100 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 7101} // hasExtraSrcRegAllocReq = 1 7102 7103def VTBL3Pseudo 7104 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 7105def VTBL4Pseudo 7106 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 7107 7108// VTBX : Vector Table Extension 7109def VTBX1 7110 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 7111 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 7112 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 7113 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 7114 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 7115let hasExtraSrcRegAllocReq = 1 in { 7116def VTBX2 7117 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 7118 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 7119 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 7120def VTBX3 7121 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 7122 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 7123 NVTBLFrm, IIC_VTBX3, 7124 "vtbx", "8", "$Vd, $Vn, $Vm", 7125 "$orig = $Vd", []>; 7126def VTBX4 7127 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 7128 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 7129 "vtbx", "8", "$Vd, $Vn, $Vm", 7130 "$orig = $Vd", []>; 7131} // hasExtraSrcRegAllocReq = 1 7132 7133def VTBX3Pseudo 7134 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7135 IIC_VTBX3, "$orig = $dst", []>; 7136def VTBX4Pseudo 7137 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 7138 IIC_VTBX4, "$orig = $dst", []>; 7139} // DecoderMethod = "DecodeTBLInstruction" 7140 7141let Predicates = [HasNEON] in { 7142def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 7143 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7144 v8i8:$Vn1, dsub_1), 7145 v8i8:$Vm))>; 7146def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7147 v8i8:$Vm)), 7148 (v8i8 (VTBX2 v8i8:$orig, 7149 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 7150 v8i8:$Vn1, dsub_1), 7151 v8i8:$Vm))>; 7152 7153def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 7154 v8i8:$Vn2, v8i8:$Vm)), 7155 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7156 v8i8:$Vn1, dsub_1, 7157 v8i8:$Vn2, dsub_2, 7158 (v8i8 (IMPLICIT_DEF)), dsub_3), 7159 v8i8:$Vm))>; 7160def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7161 v8i8:$Vn2, v8i8:$Vm)), 7162 (v8i8 (VTBX3Pseudo v8i8:$orig, 7163 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7164 v8i8:$Vn1, dsub_1, 7165 v8i8:$Vn2, dsub_2, 7166 (v8i8 (IMPLICIT_DEF)), dsub_3), 7167 v8i8:$Vm))>; 7168 7169def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 7170 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7171 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7172 v8i8:$Vn1, dsub_1, 7173 v8i8:$Vn2, dsub_2, 7174 v8i8:$Vn3, dsub_3), 7175 v8i8:$Vm))>; 7176def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 7177 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 7178 (v8i8 (VTBX4Pseudo v8i8:$orig, 7179 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 7180 v8i8:$Vn1, dsub_1, 7181 v8i8:$Vn2, dsub_2, 7182 v8i8:$Vn3, dsub_3), 7183 v8i8:$Vm))>; 7184} 7185 7186// VRINT : Vector Rounding 7187multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 7188 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 7189 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7190 !strconcat("vrint", op), "f32", 7191 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 7192 let Inst{9-7} = op9_7; 7193 } 7194 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 7195 !strconcat("vrint", op), "f32", 7196 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 7197 let Inst{9-7} = op9_7; 7198 } 7199 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7200 !strconcat("vrint", op), "f16", 7201 v4f16, v4f16, Int>, 7202 Requires<[HasV8, HasNEON, HasFullFP16]> { 7203 let Inst{9-7} = op9_7; 7204 } 7205 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 7206 !strconcat("vrint", op), "f16", 7207 v8f16, v8f16, Int>, 7208 Requires<[HasV8, HasNEON, HasFullFP16]> { 7209 let Inst{9-7} = op9_7; 7210 } 7211 } 7212 7213 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 7214 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 7215 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 7216 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 7217 let Predicates = [HasNEON, HasFullFP16] in { 7218 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 7219 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 7220 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 7221 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 7222 } 7223} 7224 7225defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 7226defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 7227defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 7228defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 7229defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 7230defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 7231 7232// Cryptography instructions 7233let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 7234 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 7235 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 7236 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7237 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7238 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 7239 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 7240 !strconcat("aes", op), "8", v16i8, v16i8, Int>; 7241 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7242 SDPatternOperator Int> 7243 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7244 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7245 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 7246 SDPatternOperator Int> 7247 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 7248 !strconcat("sha", op), "32", v4i32, v4i32, Int>; 7249 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 7250 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 7251 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>; 7252} 7253 7254let Predicates = [HasV8, HasAES] in { 7255def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 7256def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 7257def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 7258def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 7259} 7260 7261let Predicates = [HasV8, HasSHA2] in { 7262def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 7263def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 7264def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 7265def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 7266def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 7267def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 7268def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 7269def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 7270def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 7271def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 7272} 7273 7274let Predicates = [HasNEON] in { 7275def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 7276 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 7277 (SHA1H (SUBREG_TO_REG (i64 0), 7278 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 7279 ssub_0)), 7280 ssub_0)), GPR)>; 7281 7282def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7283 (SHA1C v4i32:$hash_abcd, 7284 (SUBREG_TO_REG (i64 0), 7285 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7286 ssub_0), 7287 v4i32:$wk)>; 7288 7289def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7290 (SHA1M v4i32:$hash_abcd, 7291 (SUBREG_TO_REG (i64 0), 7292 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7293 ssub_0), 7294 v4i32:$wk)>; 7295 7296def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 7297 (SHA1P v4i32:$hash_abcd, 7298 (SUBREG_TO_REG (i64 0), 7299 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 7300 ssub_0), 7301 v4i32:$wk)>; 7302} 7303 7304//===----------------------------------------------------------------------===// 7305// NEON instructions for single-precision FP math 7306//===----------------------------------------------------------------------===// 7307 7308class N2VSPat<SDNode OpNode, NeonI Inst> 7309 : NEONFPPat<(f32 (OpNode SPR:$a)), 7310 (EXTRACT_SUBREG 7311 (v2f32 (COPY_TO_REGCLASS (Inst 7312 (INSERT_SUBREG 7313 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7314 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 7315 7316class N3VSPat<SDNode OpNode, NeonI Inst> 7317 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 7318 (EXTRACT_SUBREG 7319 (v2f32 (COPY_TO_REGCLASS (Inst 7320 (INSERT_SUBREG 7321 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7322 SPR:$a, ssub_0), 7323 (INSERT_SUBREG 7324 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7325 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7326 7327class N3VSPatFP16<SDNode OpNode, NeonI Inst> 7328 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 7329 (EXTRACT_SUBREG 7330 (v4f16 (COPY_TO_REGCLASS (Inst 7331 (INSERT_SUBREG 7332 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7333 HPR:$a, ssub_0), 7334 (INSERT_SUBREG 7335 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 7336 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7337 7338class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 7339 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 7340 (EXTRACT_SUBREG 7341 (v2f32 (COPY_TO_REGCLASS (Inst 7342 (INSERT_SUBREG 7343 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7344 SPR:$acc, ssub_0), 7345 (INSERT_SUBREG 7346 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7347 SPR:$a, ssub_0), 7348 (INSERT_SUBREG 7349 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 7350 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 7351 7352class NVCVTIFPat<SDNode OpNode, NeonI Inst> 7353 : NEONFPPat<(f32 (OpNode GPR:$a)), 7354 (f32 (EXTRACT_SUBREG 7355 (v2f32 (Inst 7356 (INSERT_SUBREG 7357 (v2f32 (IMPLICIT_DEF)), 7358 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 7359 ssub_0))>; 7360class NVCVTFIPat<SDNode OpNode, NeonI Inst> 7361 : NEONFPPat<(i32 (OpNode SPR:$a)), 7362 (i32 (EXTRACT_SUBREG 7363 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 7364 SPR:$a, ssub_0))), 7365 ssub_0))>; 7366 7367def : N3VSPat<fadd, VADDfd>; 7368def : N3VSPat<fsub, VSUBfd>; 7369def : N3VSPat<fmul, VMULfd>; 7370def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7371 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7372def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7373 Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; 7374def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7375 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7376def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7377 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7378def : N2VSPat<fabs, VABSfd>; 7379def : N2VSPat<fneg, VNEGfd>; 7380def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>; 7381def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>; 7382def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>; 7383def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>; 7384def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7385def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7386def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7387def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7388 7389// NEON doesn't have any f64 conversions, so provide patterns to make 7390// sure the VFP conversions match when extracting from a vector. 7391def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7392 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7393def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7394 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7395def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7396 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7397def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7398 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7399 7400 7401// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7402def : Pat<(f32 (bitconvert GPR:$a)), 7403 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7404 Requires<[HasNEON, DontUseVMOVSR]>; 7405def : Pat<(arm_vmovsr GPR:$a), 7406 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7407 Requires<[HasNEON, DontUseVMOVSR]>; 7408 7409//===----------------------------------------------------------------------===// 7410// Non-Instruction Patterns or Endianess - Revert Patterns 7411//===----------------------------------------------------------------------===// 7412 7413// bit_convert 7414// 64 bit conversions 7415let Predicates = [HasNEON] in { 7416def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7417def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7418 7419def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7420def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7421 7422def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>; 7423def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>; 7424 7425def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16 DPR:$src)>; 7426def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16 DPR:$src)>; 7427 7428// 128 bit conversions 7429def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7430def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7431 7432def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7433def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7434 7435def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; 7436def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; 7437 7438def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16 QPR:$src)>; 7439def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16 QPR:$src)>; 7440} 7441 7442let Predicates = [IsLE,HasNEON] in { 7443 // 64 bit conversions 7444 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7445 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7446 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7447 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (f64 DPR:$src)>; 7448 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7449 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7450 7451 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7452 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7453 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>; 7454 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>; 7455 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7456 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7457 7458 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7459 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7460 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>; 7461 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>; 7462 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7463 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7464 7465 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7466 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7467 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>; 7468 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>; 7469 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7470 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7471 7472 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7473 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>; 7474 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>; 7475 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>; 7476 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>; 7477 7478 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (v4bf16 DPR:$src)>; 7479 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>; 7480 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>; 7481 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>; 7482 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (v4bf16 DPR:$src)>; 7483 7484 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7485 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7486 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7487 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7488 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7489 7490 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7491 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7492 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7493 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7494 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>; 7495 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (v8i8 DPR:$src)>; 7496 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7497 7498 // 128 bit conversions 7499 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7500 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7501 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7502 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>; 7503 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7504 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7505 7506 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7507 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7508 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; 7509 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>; 7510 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7511 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7512 7513 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7514 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7515 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; 7516 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>; 7517 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7518 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7519 7520 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7521 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7522 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; 7523 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>; 7524 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7525 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7526 7527 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7528 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; 7529 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; 7530 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; 7531 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; 7532 7533 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>; 7534 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>; 7535 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>; 7536 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>; 7537 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>; 7538 7539 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7540 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7541 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7542 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7543 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7544 7545 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7546 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7547 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7548 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7549 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; 7550 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>; 7551 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7552} 7553 7554let Predicates = [IsBE,HasNEON] in { 7555 // 64 bit conversions 7556 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7557 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7558 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7559 def : Pat<(f64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7560 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7561 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7562 7563 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7564 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7565 def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7566 def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>; 7567 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7568 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7569 7570 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7571 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7572 def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7573 def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7574 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7575 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7576 7577 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7578 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7579 def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>; 7580 def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>; 7581 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7582 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7583 7584 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7585 def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7586 def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7587 def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7588 def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7589 7590 def : Pat<(v4bf16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7591 def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7592 def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7593 def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7594 def : Pat<(v4bf16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7595 7596 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7597 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7598 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7599 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7600 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7601 7602 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7603 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7604 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7605 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7606 def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>; 7607 def : Pat<(v8i8 (bitconvert (v4bf16 DPR:$src))), (VREV16d8 DPR:$src)>; 7608 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7609 7610 // 128 bit conversions 7611 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7612 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7613 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7614 def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7615 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7616 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7617 7618 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7619 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7620 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7621 def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>; 7622 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7623 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7624 7625 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7626 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7627 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7628 def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7629 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7630 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7631 7632 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7633 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7634 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7635 def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>; 7636 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7637 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7638 7639 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7640 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7641 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7642 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7643 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7644 7645 def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7646 def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7647 def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7648 def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7649 def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7650 7651 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7652 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7653 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7654 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7655 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7656 7657 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7658 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7659 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7660 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7661 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>; 7662 def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8 QPR:$src)>; 7663 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7664} 7665 7666let Predicates = [HasNEON] in { 7667 // Here we match the specific SDNode type 'ARMVectorRegCastImpl' 7668 // rather than the more general 'ARMVectorRegCast' which would also 7669 // match some bitconverts. If we use the latter in cases where the 7670 // input and output types are the same, the bitconvert gets elided 7671 // and we end up generating a nonsense match of nothing. 7672 7673 foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7674 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7675 def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>; 7676 7677 foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7678 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in 7679 def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>; 7680} 7681 7682// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7683let Predicates = [IsBE,HasNEON] in { 7684def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7685 (VREV64q8 (VLD1q8 addrmode6:$addr))>; 7686def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7687 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>; 7688def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7689 (VREV64q16 (VLD1q16 addrmode6:$addr))>; 7690def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7691 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>; 7692} 7693 7694// Fold extracting an element out of a v2i32 into a vfp register. 7695def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7696 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>, 7697 Requires<[HasNEON]>; 7698 7699// Vector lengthening move with load, matching extending loads. 7700 7701// extload, zextload and sextload for a standard lengthening load. Example: 7702// Lengthen_Single<"8", "i16", "8"> = 7703// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7704// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7705// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7706multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7707 let AddedComplexity = 10 in { 7708 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7709 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7710 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7711 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7712 Requires<[HasNEON]>; 7713 7714 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7715 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7716 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7717 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7718 Requires<[HasNEON]>; 7719 7720 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7721 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7722 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7723 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>, 7724 Requires<[HasNEON]>; 7725 } 7726} 7727 7728// extload, zextload and sextload for a lengthening load which only uses 7729// half the lanes available. Example: 7730// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7731// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7732// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7733// (f64 (IMPLICIT_DEF)), (i32 0))), 7734// dsub_0)>; 7735multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7736 string InsnLanes, string InsnTy> { 7737 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7738 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7739 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7740 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7741 dsub_0)>, 7742 Requires<[HasNEON]>; 7743 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7744 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7745 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7746 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7747 dsub_0)>, 7748 Requires<[HasNEON]>; 7749 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7750 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7751 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7752 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7753 dsub_0)>, 7754 Requires<[HasNEON]>; 7755} 7756 7757// The following class definition is basically a copy of the 7758// Lengthen_HalfSingle definition above, however with an additional parameter 7759// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7760// data loaded by VLD1LN into proper vector format in big endian mode. 7761multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7762 string InsnLanes, string InsnTy, string RevLanes> { 7763 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7764 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7765 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7766 (!cast<Instruction>("VREV32d" # RevLanes) 7767 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7768 dsub_0)>, 7769 Requires<[HasNEON]>; 7770 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7771 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7772 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7773 (!cast<Instruction>("VREV32d" # RevLanes) 7774 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7775 dsub_0)>, 7776 Requires<[HasNEON]>; 7777 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7778 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7779 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7780 (!cast<Instruction>("VREV32d" # RevLanes) 7781 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7782 dsub_0)>, 7783 Requires<[HasNEON]>; 7784} 7785 7786// extload, zextload and sextload for a lengthening load followed by another 7787// lengthening load, to quadruple the initial length. 7788// 7789// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7790// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7791// (EXTRACT_SUBREG (VMOVLuv4i32 7792// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7793// (f64 (IMPLICIT_DEF)), 7794// (i32 0))), 7795// dsub_0)), 7796// dsub_0)>; 7797multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7798 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7799 string Insn2Ty> { 7800 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7801 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7802 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7803 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7804 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7805 dsub_0))>, 7806 Requires<[HasNEON]>; 7807 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7808 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7809 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7810 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7811 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7812 dsub_0))>, 7813 Requires<[HasNEON]>; 7814 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7815 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7816 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7817 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7818 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7819 dsub_0))>, 7820 Requires<[HasNEON]>; 7821} 7822 7823// The following class definition is basically a copy of the 7824// Lengthen_Double definition above, however with an additional parameter 7825// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7826// data loaded by VLD1LN into proper vector format in big endian mode. 7827multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7828 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7829 string Insn2Ty, string RevLanes> { 7830 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7831 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7832 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7833 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7834 (!cast<Instruction>("VREV32d" # RevLanes) 7835 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7836 dsub_0))>, 7837 Requires<[HasNEON]>; 7838 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7839 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7840 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7841 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7842 (!cast<Instruction>("VREV32d" # RevLanes) 7843 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7844 dsub_0))>, 7845 Requires<[HasNEON]>; 7846 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7847 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7848 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7849 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7850 (!cast<Instruction>("VREV32d" # RevLanes) 7851 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7852 dsub_0))>, 7853 Requires<[HasNEON]>; 7854} 7855 7856// extload, zextload and sextload for a lengthening load followed by another 7857// lengthening load, to quadruple the initial length, but which ends up only 7858// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7859// 7860// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7861// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7862// (EXTRACT_SUBREG (VMOVLuv4i32 7863// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7864// (f64 (IMPLICIT_DEF)), (i32 0))), 7865// dsub_0)), 7866// dsub_0)>; 7867multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7868 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7869 string Insn2Ty> { 7870 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7871 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7872 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7873 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7874 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7875 dsub_0)), 7876 dsub_0)>, 7877 Requires<[HasNEON]>; 7878 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7879 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7880 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7881 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7882 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7883 dsub_0)), 7884 dsub_0)>, 7885 Requires<[HasNEON]>; 7886 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7887 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7888 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7889 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7890 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7891 dsub_0)), 7892 dsub_0)>, 7893 Requires<[HasNEON]>; 7894} 7895 7896// The following class definition is basically a copy of the 7897// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7898// instruction to convert data loaded by VLD1LN into proper vector format 7899// in big endian mode. 7900multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7901 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7902 string Insn2Ty> { 7903 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7904 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7905 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7906 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7907 (!cast<Instruction>("VREV16d8") 7908 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7909 dsub_0)), 7910 dsub_0)>, 7911 Requires<[HasNEON]>; 7912 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7913 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7914 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7915 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7916 (!cast<Instruction>("VREV16d8") 7917 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7918 dsub_0)), 7919 dsub_0)>, 7920 Requires<[HasNEON]>; 7921 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7922 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7923 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7924 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7925 (!cast<Instruction>("VREV16d8") 7926 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7927 dsub_0)), 7928 dsub_0)>, 7929 Requires<[HasNEON]>; 7930} 7931 7932defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7933defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7934defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7935 7936let Predicates = [HasNEON,IsLE] in { 7937 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7938 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7939 7940 // Double lengthening - v4i8 -> v4i16 -> v4i32 7941 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7942 // v2i8 -> v2i16 -> v2i32 7943 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7944 // v2i16 -> v2i32 -> v2i64 7945 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7946} 7947 7948let Predicates = [HasNEON,IsBE] in { 7949 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7950 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7951 7952 // Double lengthening - v4i8 -> v4i16 -> v4i32 7953 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7954 // v2i8 -> v2i16 -> v2i32 7955 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7956 // v2i16 -> v2i32 -> v2i64 7957 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7958} 7959 7960// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7961let Predicates = [HasNEON,IsLE] in { 7962 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7963 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7964 (VLD1LNd16 addrmode6:$addr, 7965 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7966 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7967 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7968 (VLD1LNd16 addrmode6:$addr, 7969 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7970 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7971 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7972 (VLD1LNd16 addrmode6:$addr, 7973 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7974} 7975// The following patterns are basically a copy of the patterns above, 7976// however with an additional VREV16d instruction to convert data 7977// loaded by VLD1LN into proper vector format in big endian mode. 7978let Predicates = [HasNEON,IsBE] in { 7979 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7980 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7981 (!cast<Instruction>("VREV16d8") 7982 (VLD1LNd16 addrmode6:$addr, 7983 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7984 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7985 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7986 (!cast<Instruction>("VREV16d8") 7987 (VLD1LNd16 addrmode6:$addr, 7988 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7989 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7990 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7991 (!cast<Instruction>("VREV16d8") 7992 (VLD1LNd16 addrmode6:$addr, 7993 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7994} 7995 7996let Predicates = [HasNEON] in { 7997def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 7998 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7999def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8000 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8001def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8002 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8003def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 8004 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8005def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 8006 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8007def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8008 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8009def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)), 8010 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 8011} 8012 8013//===----------------------------------------------------------------------===// 8014// Assembler aliases 8015// 8016 8017def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 8018 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 8019def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 8020 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 8021 8022// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 8023defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8024 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8025defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 8026 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8027defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8028 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8029defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 8030 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8031defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8032 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8033defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 8034 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8035defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8036 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8037defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 8038 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8039// ... two-operand aliases 8040defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8041 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8042defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 8043 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8044defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8045 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8046defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 8047 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8048defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8049 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 8050defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 8051 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 8052// ... immediates 8053def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8054 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8055def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8056 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8057def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 8058 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 8059def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 8060 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 8061 8062 8063// VLD1 single-lane pseudo-instructions. These need special handling for 8064// the lane index that an InstAlias can't handle, so we use these instead. 8065def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 8066 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8067 pred:$p)>; 8068def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 8069 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8070 pred:$p)>; 8071def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 8072 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8073 pred:$p)>; 8074 8075def VLD1LNdWB_fixed_Asm_8 : 8076 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 8077 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8078 pred:$p)>; 8079def VLD1LNdWB_fixed_Asm_16 : 8080 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 8081 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8082 pred:$p)>; 8083def VLD1LNdWB_fixed_Asm_32 : 8084 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 8085 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8086 pred:$p)>; 8087def VLD1LNdWB_register_Asm_8 : 8088 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 8089 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8090 rGPR:$Rm, pred:$p)>; 8091def VLD1LNdWB_register_Asm_16 : 8092 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 8093 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8094 rGPR:$Rm, pred:$p)>; 8095def VLD1LNdWB_register_Asm_32 : 8096 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 8097 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8098 rGPR:$Rm, pred:$p)>; 8099 8100 8101// VST1 single-lane pseudo-instructions. These need special handling for 8102// the lane index that an InstAlias can't handle, so we use these instead. 8103def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 8104 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8105 pred:$p)>; 8106def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 8107 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8108 pred:$p)>; 8109def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 8110 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8111 pred:$p)>; 8112 8113def VST1LNdWB_fixed_Asm_8 : 8114 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 8115 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8116 pred:$p)>; 8117def VST1LNdWB_fixed_Asm_16 : 8118 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 8119 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8120 pred:$p)>; 8121def VST1LNdWB_fixed_Asm_32 : 8122 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 8123 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8124 pred:$p)>; 8125def VST1LNdWB_register_Asm_8 : 8126 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 8127 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 8128 rGPR:$Rm, pred:$p)>; 8129def VST1LNdWB_register_Asm_16 : 8130 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 8131 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 8132 rGPR:$Rm, pred:$p)>; 8133def VST1LNdWB_register_Asm_32 : 8134 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 8135 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 8136 rGPR:$Rm, pred:$p)>; 8137 8138// VLD2 single-lane pseudo-instructions. These need special handling for 8139// the lane index that an InstAlias can't handle, so we use these instead. 8140def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 8141 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8142 pred:$p)>; 8143def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8144 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8145 pred:$p)>; 8146def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8147 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 8148def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 8149 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8150 pred:$p)>; 8151def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 8152 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8153 pred:$p)>; 8154 8155def VLD2LNdWB_fixed_Asm_8 : 8156 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 8157 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8158 pred:$p)>; 8159def VLD2LNdWB_fixed_Asm_16 : 8160 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8161 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8162 pred:$p)>; 8163def VLD2LNdWB_fixed_Asm_32 : 8164 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8165 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8166 pred:$p)>; 8167def VLD2LNqWB_fixed_Asm_16 : 8168 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 8169 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8170 pred:$p)>; 8171def VLD2LNqWB_fixed_Asm_32 : 8172 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 8173 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8174 pred:$p)>; 8175def VLD2LNdWB_register_Asm_8 : 8176 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 8177 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8178 rGPR:$Rm, pred:$p)>; 8179def VLD2LNdWB_register_Asm_16 : 8180 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8181 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8182 rGPR:$Rm, pred:$p)>; 8183def VLD2LNdWB_register_Asm_32 : 8184 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8185 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8186 rGPR:$Rm, pred:$p)>; 8187def VLD2LNqWB_register_Asm_16 : 8188 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 8189 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8190 rGPR:$Rm, pred:$p)>; 8191def VLD2LNqWB_register_Asm_32 : 8192 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 8193 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8194 rGPR:$Rm, pred:$p)>; 8195 8196 8197// VST2 single-lane pseudo-instructions. These need special handling for 8198// the lane index that an InstAlias can't handle, so we use these instead. 8199def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 8200 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8201 pred:$p)>; 8202def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8203 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8204 pred:$p)>; 8205def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8206 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8207 pred:$p)>; 8208def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 8209 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8210 pred:$p)>; 8211def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 8212 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8213 pred:$p)>; 8214 8215def VST2LNdWB_fixed_Asm_8 : 8216 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 8217 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8218 pred:$p)>; 8219def VST2LNdWB_fixed_Asm_16 : 8220 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8221 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8222 pred:$p)>; 8223def VST2LNdWB_fixed_Asm_32 : 8224 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8225 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8226 pred:$p)>; 8227def VST2LNqWB_fixed_Asm_16 : 8228 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 8229 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8230 pred:$p)>; 8231def VST2LNqWB_fixed_Asm_32 : 8232 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 8233 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8234 pred:$p)>; 8235def VST2LNdWB_register_Asm_8 : 8236 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 8237 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 8238 rGPR:$Rm, pred:$p)>; 8239def VST2LNdWB_register_Asm_16 : 8240 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8241 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 8242 rGPR:$Rm, pred:$p)>; 8243def VST2LNdWB_register_Asm_32 : 8244 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8245 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 8246 rGPR:$Rm, pred:$p)>; 8247def VST2LNqWB_register_Asm_16 : 8248 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 8249 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 8250 rGPR:$Rm, pred:$p)>; 8251def VST2LNqWB_register_Asm_32 : 8252 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 8253 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 8254 rGPR:$Rm, pred:$p)>; 8255 8256// VLD3 all-lanes pseudo-instructions. These need special handling for 8257// the lane index that an InstAlias can't handle, so we use these instead. 8258def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8259 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8260 pred:$p)>; 8261def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8262 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8263 pred:$p)>; 8264def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8265 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8266 pred:$p)>; 8267def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8268 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8269 pred:$p)>; 8270def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8271 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8272 pred:$p)>; 8273def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8274 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8275 pred:$p)>; 8276 8277def VLD3DUPdWB_fixed_Asm_8 : 8278 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8279 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8280 pred:$p)>; 8281def VLD3DUPdWB_fixed_Asm_16 : 8282 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8283 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8284 pred:$p)>; 8285def VLD3DUPdWB_fixed_Asm_32 : 8286 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8287 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8288 pred:$p)>; 8289def VLD3DUPqWB_fixed_Asm_8 : 8290 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8291 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8292 pred:$p)>; 8293def VLD3DUPqWB_fixed_Asm_16 : 8294 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8295 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8296 pred:$p)>; 8297def VLD3DUPqWB_fixed_Asm_32 : 8298 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8299 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8300 pred:$p)>; 8301def VLD3DUPdWB_register_Asm_8 : 8302 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8303 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8304 rGPR:$Rm, pred:$p)>; 8305def VLD3DUPdWB_register_Asm_16 : 8306 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8307 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8308 rGPR:$Rm, pred:$p)>; 8309def VLD3DUPdWB_register_Asm_32 : 8310 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8311 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 8312 rGPR:$Rm, pred:$p)>; 8313def VLD3DUPqWB_register_Asm_8 : 8314 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8315 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8316 rGPR:$Rm, pred:$p)>; 8317def VLD3DUPqWB_register_Asm_16 : 8318 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8319 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8320 rGPR:$Rm, pred:$p)>; 8321def VLD3DUPqWB_register_Asm_32 : 8322 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8323 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 8324 rGPR:$Rm, pred:$p)>; 8325 8326 8327// VLD3 single-lane pseudo-instructions. These need special handling for 8328// the lane index that an InstAlias can't handle, so we use these instead. 8329def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8330 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8331 pred:$p)>; 8332def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8333 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8334 pred:$p)>; 8335def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8336 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8337 pred:$p)>; 8338def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8339 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8340 pred:$p)>; 8341def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8342 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8343 pred:$p)>; 8344 8345def VLD3LNdWB_fixed_Asm_8 : 8346 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8347 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8348 pred:$p)>; 8349def VLD3LNdWB_fixed_Asm_16 : 8350 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8351 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8352 pred:$p)>; 8353def VLD3LNdWB_fixed_Asm_32 : 8354 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8355 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8356 pred:$p)>; 8357def VLD3LNqWB_fixed_Asm_16 : 8358 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8359 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8360 pred:$p)>; 8361def VLD3LNqWB_fixed_Asm_32 : 8362 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8363 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8364 pred:$p)>; 8365def VLD3LNdWB_register_Asm_8 : 8366 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8367 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8368 rGPR:$Rm, pred:$p)>; 8369def VLD3LNdWB_register_Asm_16 : 8370 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8371 (ins VecListThreeDHWordIndexed:$list, 8372 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8373def VLD3LNdWB_register_Asm_32 : 8374 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8375 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8376 rGPR:$Rm, pred:$p)>; 8377def VLD3LNqWB_register_Asm_16 : 8378 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8379 (ins VecListThreeQHWordIndexed:$list, 8380 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8381def VLD3LNqWB_register_Asm_32 : 8382 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8383 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8384 rGPR:$Rm, pred:$p)>; 8385 8386// VLD3 multiple structure pseudo-instructions. These need special handling for 8387// the vector operands that the normal instructions don't yet model. 8388// FIXME: Remove these when the register classes and instructions are updated. 8389def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8390 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8391def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8392 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8393def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8394 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8395def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 8396 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8397def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 8398 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8399def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 8400 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8401 8402def VLD3dWB_fixed_Asm_8 : 8403 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8404 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8405def VLD3dWB_fixed_Asm_16 : 8406 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8407 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8408def VLD3dWB_fixed_Asm_32 : 8409 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8410 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8411def VLD3qWB_fixed_Asm_8 : 8412 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 8413 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8414def VLD3qWB_fixed_Asm_16 : 8415 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 8416 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8417def VLD3qWB_fixed_Asm_32 : 8418 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 8419 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8420def VLD3dWB_register_Asm_8 : 8421 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8422 (ins VecListThreeD:$list, addrmode6align64:$addr, 8423 rGPR:$Rm, pred:$p)>; 8424def VLD3dWB_register_Asm_16 : 8425 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8426 (ins VecListThreeD:$list, addrmode6align64:$addr, 8427 rGPR:$Rm, pred:$p)>; 8428def VLD3dWB_register_Asm_32 : 8429 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8430 (ins VecListThreeD:$list, addrmode6align64:$addr, 8431 rGPR:$Rm, pred:$p)>; 8432def VLD3qWB_register_Asm_8 : 8433 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 8434 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8435 rGPR:$Rm, pred:$p)>; 8436def VLD3qWB_register_Asm_16 : 8437 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 8438 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8439 rGPR:$Rm, pred:$p)>; 8440def VLD3qWB_register_Asm_32 : 8441 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 8442 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8443 rGPR:$Rm, pred:$p)>; 8444 8445// VST3 single-lane pseudo-instructions. These need special handling for 8446// the lane index that an InstAlias can't handle, so we use these instead. 8447def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8448 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8449 pred:$p)>; 8450def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8451 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8452 pred:$p)>; 8453def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8454 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8455 pred:$p)>; 8456def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8457 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8458 pred:$p)>; 8459def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8460 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8461 pred:$p)>; 8462 8463def VST3LNdWB_fixed_Asm_8 : 8464 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8465 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8466 pred:$p)>; 8467def VST3LNdWB_fixed_Asm_16 : 8468 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8469 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 8470 pred:$p)>; 8471def VST3LNdWB_fixed_Asm_32 : 8472 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8473 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8474 pred:$p)>; 8475def VST3LNqWB_fixed_Asm_16 : 8476 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8477 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 8478 pred:$p)>; 8479def VST3LNqWB_fixed_Asm_32 : 8480 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8481 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8482 pred:$p)>; 8483def VST3LNdWB_register_Asm_8 : 8484 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8485 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 8486 rGPR:$Rm, pred:$p)>; 8487def VST3LNdWB_register_Asm_16 : 8488 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8489 (ins VecListThreeDHWordIndexed:$list, 8490 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8491def VST3LNdWB_register_Asm_32 : 8492 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8493 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 8494 rGPR:$Rm, pred:$p)>; 8495def VST3LNqWB_register_Asm_16 : 8496 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8497 (ins VecListThreeQHWordIndexed:$list, 8498 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 8499def VST3LNqWB_register_Asm_32 : 8500 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8501 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 8502 rGPR:$Rm, pred:$p)>; 8503 8504 8505// VST3 multiple structure pseudo-instructions. These need special handling for 8506// the vector operands that the normal instructions don't yet model. 8507// FIXME: Remove these when the register classes and instructions are updated. 8508def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8509 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8510def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8511 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8512def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8513 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8514def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 8515 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8516def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8517 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8518def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8519 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8520 8521def VST3dWB_fixed_Asm_8 : 8522 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8523 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8524def VST3dWB_fixed_Asm_16 : 8525 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8526 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8527def VST3dWB_fixed_Asm_32 : 8528 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8529 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8530def VST3qWB_fixed_Asm_8 : 8531 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8532 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8533def VST3qWB_fixed_Asm_16 : 8534 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8535 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8536def VST3qWB_fixed_Asm_32 : 8537 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8538 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8539def VST3dWB_register_Asm_8 : 8540 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8541 (ins VecListThreeD:$list, addrmode6align64:$addr, 8542 rGPR:$Rm, pred:$p)>; 8543def VST3dWB_register_Asm_16 : 8544 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8545 (ins VecListThreeD:$list, addrmode6align64:$addr, 8546 rGPR:$Rm, pred:$p)>; 8547def VST3dWB_register_Asm_32 : 8548 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8549 (ins VecListThreeD:$list, addrmode6align64:$addr, 8550 rGPR:$Rm, pred:$p)>; 8551def VST3qWB_register_Asm_8 : 8552 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8553 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8554 rGPR:$Rm, pred:$p)>; 8555def VST3qWB_register_Asm_16 : 8556 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8557 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8558 rGPR:$Rm, pred:$p)>; 8559def VST3qWB_register_Asm_32 : 8560 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8561 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8562 rGPR:$Rm, pred:$p)>; 8563 8564// VLD4 all-lanes pseudo-instructions. These need special handling for 8565// the lane index that an InstAlias can't handle, so we use these instead. 8566def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8567 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8568 pred:$p)>; 8569def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8570 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8571 pred:$p)>; 8572def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8573 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8574 pred:$p)>; 8575def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8576 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8577 pred:$p)>; 8578def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8579 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8580 pred:$p)>; 8581def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8582 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8583 pred:$p)>; 8584 8585def VLD4DUPdWB_fixed_Asm_8 : 8586 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8587 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8588 pred:$p)>; 8589def VLD4DUPdWB_fixed_Asm_16 : 8590 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8591 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8592 pred:$p)>; 8593def VLD4DUPdWB_fixed_Asm_32 : 8594 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8595 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8596 pred:$p)>; 8597def VLD4DUPqWB_fixed_Asm_8 : 8598 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8599 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8600 pred:$p)>; 8601def VLD4DUPqWB_fixed_Asm_16 : 8602 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8603 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8604 pred:$p)>; 8605def VLD4DUPqWB_fixed_Asm_32 : 8606 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8607 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8608 pred:$p)>; 8609def VLD4DUPdWB_register_Asm_8 : 8610 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8611 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8612 rGPR:$Rm, pred:$p)>; 8613def VLD4DUPdWB_register_Asm_16 : 8614 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8615 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8616 rGPR:$Rm, pred:$p)>; 8617def VLD4DUPdWB_register_Asm_32 : 8618 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8619 (ins VecListFourDAllLanes:$list, 8620 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8621def VLD4DUPqWB_register_Asm_8 : 8622 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8623 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8624 rGPR:$Rm, pred:$p)>; 8625def VLD4DUPqWB_register_Asm_16 : 8626 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8627 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8628 rGPR:$Rm, pred:$p)>; 8629def VLD4DUPqWB_register_Asm_32 : 8630 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8631 (ins VecListFourQAllLanes:$list, 8632 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8633 8634 8635// VLD4 single-lane pseudo-instructions. These need special handling for 8636// the lane index that an InstAlias can't handle, so we use these instead. 8637def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8638 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8639 pred:$p)>; 8640def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8641 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8642 pred:$p)>; 8643def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8644 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8645 pred:$p)>; 8646def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8647 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8648 pred:$p)>; 8649def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8650 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8651 pred:$p)>; 8652 8653def VLD4LNdWB_fixed_Asm_8 : 8654 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8655 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8656 pred:$p)>; 8657def VLD4LNdWB_fixed_Asm_16 : 8658 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8659 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8660 pred:$p)>; 8661def VLD4LNdWB_fixed_Asm_32 : 8662 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8663 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8664 pred:$p)>; 8665def VLD4LNqWB_fixed_Asm_16 : 8666 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8667 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8668 pred:$p)>; 8669def VLD4LNqWB_fixed_Asm_32 : 8670 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8671 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8672 pred:$p)>; 8673def VLD4LNdWB_register_Asm_8 : 8674 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8675 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8676 rGPR:$Rm, pred:$p)>; 8677def VLD4LNdWB_register_Asm_16 : 8678 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8679 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8680 rGPR:$Rm, pred:$p)>; 8681def VLD4LNdWB_register_Asm_32 : 8682 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8683 (ins VecListFourDWordIndexed:$list, 8684 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8685def VLD4LNqWB_register_Asm_16 : 8686 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8687 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8688 rGPR:$Rm, pred:$p)>; 8689def VLD4LNqWB_register_Asm_32 : 8690 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8691 (ins VecListFourQWordIndexed:$list, 8692 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8693 8694 8695 8696// VLD4 multiple structure pseudo-instructions. These need special handling for 8697// the vector operands that the normal instructions don't yet model. 8698// FIXME: Remove these when the register classes and instructions are updated. 8699def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8700 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8701 pred:$p)>; 8702def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8703 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8704 pred:$p)>; 8705def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8706 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8707 pred:$p)>; 8708def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8709 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8710 pred:$p)>; 8711def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8712 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8713 pred:$p)>; 8714def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8715 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8716 pred:$p)>; 8717 8718def VLD4dWB_fixed_Asm_8 : 8719 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8720 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8721 pred:$p)>; 8722def VLD4dWB_fixed_Asm_16 : 8723 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8724 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8725 pred:$p)>; 8726def VLD4dWB_fixed_Asm_32 : 8727 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8728 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8729 pred:$p)>; 8730def VLD4qWB_fixed_Asm_8 : 8731 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8732 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8733 pred:$p)>; 8734def VLD4qWB_fixed_Asm_16 : 8735 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8736 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8737 pred:$p)>; 8738def VLD4qWB_fixed_Asm_32 : 8739 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8740 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8741 pred:$p)>; 8742def VLD4dWB_register_Asm_8 : 8743 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8744 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8745 rGPR:$Rm, pred:$p)>; 8746def VLD4dWB_register_Asm_16 : 8747 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8748 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8749 rGPR:$Rm, pred:$p)>; 8750def VLD4dWB_register_Asm_32 : 8751 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8752 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8753 rGPR:$Rm, pred:$p)>; 8754def VLD4qWB_register_Asm_8 : 8755 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8756 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8757 rGPR:$Rm, pred:$p)>; 8758def VLD4qWB_register_Asm_16 : 8759 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8760 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8761 rGPR:$Rm, pred:$p)>; 8762def VLD4qWB_register_Asm_32 : 8763 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8764 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8765 rGPR:$Rm, pred:$p)>; 8766 8767// VST4 single-lane pseudo-instructions. These need special handling for 8768// the lane index that an InstAlias can't handle, so we use these instead. 8769def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8770 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8771 pred:$p)>; 8772def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8773 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8774 pred:$p)>; 8775def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8776 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8777 pred:$p)>; 8778def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8779 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8780 pred:$p)>; 8781def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8782 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8783 pred:$p)>; 8784 8785def VST4LNdWB_fixed_Asm_8 : 8786 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8787 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8788 pred:$p)>; 8789def VST4LNdWB_fixed_Asm_16 : 8790 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8791 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8792 pred:$p)>; 8793def VST4LNdWB_fixed_Asm_32 : 8794 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8795 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8796 pred:$p)>; 8797def VST4LNqWB_fixed_Asm_16 : 8798 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8799 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8800 pred:$p)>; 8801def VST4LNqWB_fixed_Asm_32 : 8802 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8803 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8804 pred:$p)>; 8805def VST4LNdWB_register_Asm_8 : 8806 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8807 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8808 rGPR:$Rm, pred:$p)>; 8809def VST4LNdWB_register_Asm_16 : 8810 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8811 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8812 rGPR:$Rm, pred:$p)>; 8813def VST4LNdWB_register_Asm_32 : 8814 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8815 (ins VecListFourDWordIndexed:$list, 8816 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8817def VST4LNqWB_register_Asm_16 : 8818 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8819 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8820 rGPR:$Rm, pred:$p)>; 8821def VST4LNqWB_register_Asm_32 : 8822 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8823 (ins VecListFourQWordIndexed:$list, 8824 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8825 8826 8827// VST4 multiple structure pseudo-instructions. These need special handling for 8828// the vector operands that the normal instructions don't yet model. 8829// FIXME: Remove these when the register classes and instructions are updated. 8830def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8831 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8832 pred:$p)>; 8833def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8834 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8835 pred:$p)>; 8836def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8837 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8838 pred:$p)>; 8839def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8840 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8841 pred:$p)>; 8842def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8843 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8844 pred:$p)>; 8845def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8846 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8847 pred:$p)>; 8848 8849def VST4dWB_fixed_Asm_8 : 8850 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8851 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8852 pred:$p)>; 8853def VST4dWB_fixed_Asm_16 : 8854 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8855 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8856 pred:$p)>; 8857def VST4dWB_fixed_Asm_32 : 8858 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8859 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8860 pred:$p)>; 8861def VST4qWB_fixed_Asm_8 : 8862 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8863 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8864 pred:$p)>; 8865def VST4qWB_fixed_Asm_16 : 8866 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8867 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8868 pred:$p)>; 8869def VST4qWB_fixed_Asm_32 : 8870 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8871 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8872 pred:$p)>; 8873def VST4dWB_register_Asm_8 : 8874 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8875 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8876 rGPR:$Rm, pred:$p)>; 8877def VST4dWB_register_Asm_16 : 8878 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8879 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8880 rGPR:$Rm, pred:$p)>; 8881def VST4dWB_register_Asm_32 : 8882 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8883 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8884 rGPR:$Rm, pred:$p)>; 8885def VST4qWB_register_Asm_8 : 8886 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8887 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8888 rGPR:$Rm, pred:$p)>; 8889def VST4qWB_register_Asm_16 : 8890 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8891 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8892 rGPR:$Rm, pred:$p)>; 8893def VST4qWB_register_Asm_32 : 8894 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8895 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8896 rGPR:$Rm, pred:$p)>; 8897 8898// VMOV/VMVN takes an optional datatype suffix 8899defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8900 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8901defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8902 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8903 8904defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8905 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8906defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8907 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8908 8909// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8910// D-register versions. 8911def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8912 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8913def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8914 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8915def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8916 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8917def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8918 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8919def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8920 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8921def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8922 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8923def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8924 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8925let Predicates = [HasNEON, HasFullFP16] in 8926def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8927 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8928// Q-register versions. 8929def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8930 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8931def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8932 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8933def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8934 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8935def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8936 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8937def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8938 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8939def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8940 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8941def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8942 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8943let Predicates = [HasNEON, HasFullFP16] in 8944def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8945 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8946 8947// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8948// D-register versions. 8949def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8950 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8951def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8952 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8953def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8954 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8955def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8956 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8957def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8958 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8959def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8960 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8961def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8962 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8963let Predicates = [HasNEON, HasFullFP16] in 8964def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8965 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8966// Q-register versions. 8967def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8968 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8969def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8970 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8971def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8972 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8973def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8974 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8975def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8976 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8977def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8978 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8979def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8980 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8981let Predicates = [HasNEON, HasFullFP16] in 8982def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8983 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8984 8985// VSWP allows, but does not require, a type suffix. 8986defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8987 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8988defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8989 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8990 8991// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8992defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8993 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8994defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8995 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8996defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8997 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8998defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8999 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9000defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 9001 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9002defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 9003 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 9004 9005// "vmov Rd, #-imm" can be handled via "vmvn". 9006def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9007 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9008def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 9009 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9010def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9011 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9012def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 9013 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 9014 9015// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 9016// these should restrict to just the Q register variants, but the register 9017// classes are enough to match correctly regardless, so we keep it simple 9018// and just use MnemonicAlias. 9019def : NEONMnemonicAlias<"vbicq", "vbic">; 9020def : NEONMnemonicAlias<"vandq", "vand">; 9021def : NEONMnemonicAlias<"veorq", "veor">; 9022def : NEONMnemonicAlias<"vorrq", "vorr">; 9023 9024def : NEONMnemonicAlias<"vmovq", "vmov">; 9025def : NEONMnemonicAlias<"vmvnq", "vmvn">; 9026// Explicit versions for floating point so that the FPImm variants get 9027// handled early. The parser gets confused otherwise. 9028def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 9029def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 9030 9031def : NEONMnemonicAlias<"vaddq", "vadd">; 9032def : NEONMnemonicAlias<"vsubq", "vsub">; 9033 9034def : NEONMnemonicAlias<"vminq", "vmin">; 9035def : NEONMnemonicAlias<"vmaxq", "vmax">; 9036 9037def : NEONMnemonicAlias<"vmulq", "vmul">; 9038 9039def : NEONMnemonicAlias<"vabsq", "vabs">; 9040 9041def : NEONMnemonicAlias<"vshlq", "vshl">; 9042def : NEONMnemonicAlias<"vshrq", "vshr">; 9043 9044def : NEONMnemonicAlias<"vcvtq", "vcvt">; 9045 9046def : NEONMnemonicAlias<"vcleq", "vcle">; 9047def : NEONMnemonicAlias<"vceqq", "vceq">; 9048 9049def : NEONMnemonicAlias<"vzipq", "vzip">; 9050def : NEONMnemonicAlias<"vswpq", "vswp">; 9051 9052def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 9053def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 9054 9055 9056// Alias for loading floating point immediates that aren't representable 9057// using the vmov.f32 encoding but the bitpattern is representable using 9058// the .i32 encoding. 9059def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9060 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9061def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 9062 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 9063 9064// ARMv8.6a BFloat16 instructions. 9065let Predicates = [HasBF16, HasNEON] in { 9066class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6, 9067 dag oops, dag iops, list<dag> pattern> 9068 : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops, 9069 N3RegFrm, IIC_VDOTPROD, "", "", pattern> 9070{ 9071 let DecoderNamespace = "VFPV8"; 9072} 9073 9074class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy> 9075 : BF16VDOT<0b11000, 0b00, Q, (outs RegTy:$dst), 9076 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9077 [(set (AccumTy RegTy:$dst), 9078 (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9079 (InputTy RegTy:$Vn), 9080 (InputTy RegTy:$Vm)))]> { 9081 let Constraints = "$dst = $Vd"; 9082 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9083 let DecoderNamespace = "VFPV8"; 9084} 9085 9086multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, 9087 ValueType InputTy, dag RHS> { 9088 9089 def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst), 9090 (ins RegTy:$Vd, RegTy:$Vn, 9091 DPR_VFP2:$Vm, VectorIndex32:$lane), []> { 9092 bit lane; 9093 let Inst{5} = lane; 9094 let Constraints = "$dst = $Vd"; 9095 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane"); 9096 let DecoderNamespace = "VFPV8"; 9097 } 9098 9099 def : Pat< 9100 (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd), 9101 (InputTy RegTy:$Vn), 9102 (InputTy (bitconvert (AccumTy 9103 (ARMvduplane (AccumTy RegTy:$Vm), 9104 VectorIndex32:$lane)))))), 9105 (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>; 9106} 9107 9108def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>; 9109def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>; 9110 9111defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>; 9112defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 9113 9114class BF16MM<bit Q, RegisterClass RegTy, 9115 string opc> 9116 : N3Vnp<0b11000, 0b00, 0b1100, Q, 0, 9117 (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), 9118 N3RegFrm, IIC_VDOTPROD, "", "", 9119 [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd), 9120 (v8bf16 QPR:$Vn), 9121 (v8bf16 QPR:$Vm)))]> { 9122 let Constraints = "$dst = $Vd"; 9123 let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm"); 9124 let DecoderNamespace = "VFPV8"; 9125} 9126 9127def VMMLA : BF16MM<1, QPR, "vmmla">; 9128 9129class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode> 9130 : N3VCP8<0b00, 0b11, T, 1, 9131 (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), 9132 NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "", 9133 [(set (v4f32 QPR:$dst), 9134 (OpNode (v4f32 QPR:$Vd), 9135 (v8bf16 QPR:$Vn), 9136 (v8bf16 QPR:$Vm)))]> { 9137 let Constraints = "$dst = $Vd"; 9138 let DecoderNamespace = "VFPV8"; 9139} 9140 9141def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>; 9142def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>; 9143 9144multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> { 9145 def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst), 9146 (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx), 9147 IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> { 9148 bits<2> idx; 9149 let Inst{5} = idx{1}; 9150 let Inst{3} = idx{0}; 9151 let Constraints = "$dst = $Vd"; 9152 let DecoderNamespace = "VFPV8"; 9153 } 9154 9155 def : Pat< 9156 (v4f32 (OpNode (v4f32 QPR:$Vd), 9157 (v8bf16 QPR:$Vn), 9158 (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm), 9159 VectorIndex16:$lane)))), 9160 (!cast<Instruction>(NAME) QPR:$Vd, 9161 QPR:$Vn, 9162 (EXTRACT_SUBREG QPR:$Vm, 9163 (DSubReg_i16_reg VectorIndex16:$lane)), 9164 (SubReg_i16_lane VectorIndex16:$lane))>; 9165} 9166 9167defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>; 9168defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>; 9169 9170def BF16_VCVT : N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0, 9171 (outs DPR:$Vd), (ins QPR:$Vm), 9172 NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>; 9173} 9174// End of BFloat16 instructions 9175